diff --git a/lama_cleaner/const.py b/lama_cleaner/const.py index 14c0a93..a54c847 100644 --- a/lama_cleaner/const.py +++ b/lama_cleaner/const.py @@ -60,6 +60,7 @@ SD_CONTROLNET_CHOICES = [ "control_v11p_sd15_canny", "control_v11p_sd15_openpose", "control_v11p_sd15_inpaint", + "control_v11f1p_sd15_depth" ] SD_LOCAL_MODEL_HELP = """ diff --git a/lama_cleaner/model/controlnet.py b/lama_cleaner/model/controlnet.py index fa2060c..b8b647f 100644 --- a/lama_cleaner/model/controlnet.py +++ b/lama_cleaner/model/controlnet.py @@ -68,13 +68,28 @@ def load_from_local_model( logger.info(f"Converting {local_model_path} to diffusers controlnet pipeline") - pipe = download_from_original_stable_diffusion_ckpt( - local_model_path, - num_in_channels=4 if is_native_control_inpaint else 9, - from_safetensors=local_model_path.endswith("safetensors"), - device="cpu", - load_safety_checker=False, - ) + try: + pipe = download_from_original_stable_diffusion_ckpt( + local_model_path, + num_in_channels=4 if is_native_control_inpaint else 9, + from_safetensors=local_model_path.endswith("safetensors"), + device="cpu", + load_safety_checker=False, + ) + except Exception as e: + err_msg = str(e) + logger.exception(e) + if is_native_control_inpaint and "[320, 9, 3, 3]" in err_msg: + logger.error( + "control_v11p_sd15_inpaint method requires normal SD model, not inpainting SD model" + ) + if not is_native_control_inpaint and "[320, 4, 3, 3]" in err_msg: + logger.error( + f"{controlnet.config['_name_or_path']} method requires inpainting SD model, " + f"you can convert any SD model to inpainting model in AUTO1111: \n" + f"https://www.reddit.com/r/StableDiffusion/comments/zyi24j/how_to_turn_any_model_into_an_inpainting_model/" + ) + exit(-1) inpaint_pipe = pipe_class( vae=pipe.vae, @@ -203,7 +218,7 @@ class ControlNet(DiffusionInpaintModel): negative_prompt=config.negative_prompt, generator=torch.manual_seed(config.sd_seed), output_type="np.array", - callback=self.callback + callback=self.callback, ).images[0] else: if "canny" in self.sd_controlnet_method: @@ -219,6 +234,17 @@ class ControlNet(DiffusionInpaintModel): processor = OpenposeDetector.from_pretrained("lllyasviel/ControlNet") control_image = processor(image, hand_and_face=True) + elif "depth" in self.sd_controlnet_method: + from transformers import pipeline + + depth_estimator = pipeline("depth-estimation") + depth_image = depth_estimator(PIL.Image.fromarray(image))["depth"] + depth_image = np.array(depth_image) + depth_image = depth_image[:, :, None] + depth_image = np.concatenate( + [depth_image, depth_image, depth_image], axis=2 + ) + control_image = PIL.Image.fromarray(depth_image) else: raise NotImplementedError( f"{self.sd_controlnet_method} not implemented" diff --git a/lama_cleaner/schema.py b/lama_cleaner/schema.py index 0be9f47..233fe26 100644 --- a/lama_cleaner/schema.py +++ b/lama_cleaner/schema.py @@ -96,5 +96,5 @@ class Config(BaseModel): p2p_guidance_scale: float = 7.5 # ControlNet - controlnet_conditioning_scale: float = 1.0 + controlnet_conditioning_scale: float = 0.4 controlnet_method: str = "control_v11p_sd15_canny" diff --git a/lama_cleaner/tests/test_controlnet.py b/lama_cleaner/tests/test_controlnet.py index 224dd73..eb717c8 100644 --- a/lama_cleaner/tests/test_controlnet.py +++ b/lama_cleaner/tests/test_controlnet.py @@ -44,7 +44,21 @@ def test_runway_sd_1_5( sd_cpu_textencoder=cpu_textencoder, sd_controlnet_method=sd_controlnet_method, ) - cfg = get_config(strategy, prompt="a fox sitting on a bench", sd_steps=sd_steps) + + controlnet_conditioning_scale = { + "control_v11p_sd15_canny": 0.4, + "control_v11p_sd15_openpose": 0.4, + "control_v11p_sd15_inpaint": 1.0, + "control_v11f1p_sd15_depth": 1.0, + }[sd_controlnet_method] + + cfg = get_config( + strategy, + prompt="a fox sitting on a bench", + sd_steps=sd_steps, + controlnet_conditioning_scale=controlnet_conditioning_scale, + controlnet_method=sd_controlnet_method, + ) cfg.sd_sampler = sampler name = f"device_{sd_device}_{sampler}_cpu_textencoder_disable_nsfw" @@ -85,6 +99,7 @@ def test_local_file_path(sd_device, sampler): HDStrategy.ORIGINAL, prompt="a fox sitting on a bench", sd_steps=sd_steps, + controlnet_method="control_v11p_sd15_canny", ) cfg.sd_sampler = sampler @@ -126,6 +141,7 @@ def test_local_file_path_controlnet_native_inpainting(sd_device, sampler): sd_steps=sd_steps, controlnet_conditioning_scale=1.0, sd_strength=1.0, + controlnet_method="control_v11p_sd15_inpaint", ) cfg.sd_sampler = sampler