From 5da47ee035b4c3a973a259323d622d2b6d396ffc Mon Sep 17 00:00:00 2001 From: Qing Date: Mon, 8 Jan 2024 21:49:18 +0800 Subject: [PATCH] add back enable_attention_slicing for mps device --- iopaint/model/controlnet.py | 3 +++ iopaint/model/instruct_pix2pix.py | 2 ++ iopaint/model/kandinsky.py | 2 ++ iopaint/model/paint_by_example.py | 3 +++ iopaint/model/sd.py | 6 +++++- iopaint/model/sdxl.py | 5 +++++ iopaint/model/utils.py | 1 - 7 files changed, 20 insertions(+), 2 deletions(-) diff --git a/iopaint/model/controlnet.py b/iopaint/model/controlnet.py index 0b0cb33..45836de 100644 --- a/iopaint/model/controlnet.py +++ b/iopaint/model/controlnet.py @@ -96,6 +96,9 @@ class ControlNet(DiffusionInpaintModel): **model_kwargs, ) + if torch.backends.mps.is_available(): + self.model.enable_attention_slicing() + if kwargs.get("cpu_offload", False) and use_gpu: logger.info("Enable sequential cpu offload") self.model.enable_sequential_cpu_offload(gpu_id=0) diff --git a/iopaint/model/instruct_pix2pix.py b/iopaint/model/instruct_pix2pix.py index 697f932..ec69927 100644 --- a/iopaint/model/instruct_pix2pix.py +++ b/iopaint/model/instruct_pix2pix.py @@ -34,6 +34,8 @@ class InstructPix2Pix(DiffusionInpaintModel): self.model = StableDiffusionInstructPix2PixPipeline.from_pretrained( self.name, variant="fp16", torch_dtype=torch_dtype, **model_kwargs ) + if torch.backends.mps.is_available(): + self.model.enable_attention_slicing() if kwargs.get("cpu_offload", False) and use_gpu: logger.info("Enable sequential cpu offload") diff --git a/iopaint/model/kandinsky.py b/iopaint/model/kandinsky.py index 4419921..9e66b0b 100644 --- a/iopaint/model/kandinsky.py +++ b/iopaint/model/kandinsky.py @@ -26,6 +26,8 @@ class Kandinsky(DiffusionInpaintModel): self.model = AutoPipelineForInpainting.from_pretrained( self.name, **model_kwargs ).to(device) + if torch.backends.mps.is_available(): + self.model.enable_attention_slicing() self.callback = kwargs.pop("callback", None) diff --git a/iopaint/model/paint_by_example.py b/iopaint/model/paint_by_example.py index 18207e4..8e0abee 100644 --- a/iopaint/model/paint_by_example.py +++ b/iopaint/model/paint_by_example.py @@ -32,6 +32,9 @@ class PaintByExample(DiffusionInpaintModel): self.name, torch_dtype=torch_dtype, **model_kwargs ) + if torch.backends.mps.is_available(): + self.model.enable_attention_slicing() + # TODO: gpu_id if kwargs.get("cpu_offload", False) and use_gpu: self.model.image_encoder = self.model.image_encoder.to(device) diff --git a/iopaint/model/sd.py b/iopaint/model/sd.py index 403e9f6..6a11b32 100644 --- a/iopaint/model/sd.py +++ b/iopaint/model/sd.py @@ -29,7 +29,6 @@ class SD(DiffusionInpaintModel): requires_safety_checker=False, ) ) - use_gpu = device == torch.device("cuda") and torch.cuda.is_available() torch_dtype = torch.float16 if use_gpu and fp16 else torch.float32 @@ -51,6 +50,11 @@ class SD(DiffusionInpaintModel): **model_kwargs, ) + if torch.backends.mps.is_available(): + # MPS: Recommended RAM < 64 GB https://huggingface.co/docs/diffusers/optimization/mps + # CUDA: Don't enable attention slicing if you're already using `scaled_dot_product_attention` (SDPA) from PyTorch 2.0 or xFormers. https://huggingface.co/docs/diffusers/v0.25.0/en/api/pipelines/stable_diffusion/image_variation#diffusers.StableDiffusionImageVariationPipeline.enable_attention_slicing + self.model.enable_attention_slicing() + if kwargs.get("cpu_offload", False) and use_gpu: logger.info("Enable sequential cpu offload") self.model.enable_sequential_cpu_offload(gpu_id=0) diff --git a/iopaint/model/sdxl.py b/iopaint/model/sdxl.py index 6ec13da..6778be7 100644 --- a/iopaint/model/sdxl.py +++ b/iopaint/model/sdxl.py @@ -50,6 +50,11 @@ class SDXL(DiffusionInpaintModel): variant="fp16", ) + if torch.backends.mps.is_available(): + # MPS: Recommended RAM < 64 GB https://huggingface.co/docs/diffusers/optimization/mps + # CUDA: Don't enable attention slicing if you're already using `scaled_dot_product_attention` (SDPA) from PyTorch 2.0 or xFormers. https://huggingface.co/docs/diffusers/v0.25.0/en/api/pipelines/stable_diffusion/image_variation#diffusers.StableDiffusionImageVariationPipeline.enable_attention_slicing + self.model.enable_attention_slicing() + if kwargs.get("cpu_offload", False) and use_gpu: logger.info("Enable sequential cpu offload") self.model.enable_sequential_cpu_offload(gpu_id=0) diff --git a/iopaint/model/utils.py b/iopaint/model/utils.py index 178b71a..99a4404 100644 --- a/iopaint/model/utils.py +++ b/iopaint/model/utils.py @@ -24,7 +24,6 @@ from diffusers import ( KDPM2AncestralDiscreteScheduler, HeunDiscreteScheduler, ) -from diffusers.configuration_utils import FrozenDict from loguru import logger from iopaint.schema import SDSampler