From b8a030f83a4b5cf1f070ddd457209b29db7fedb3 Mon Sep 17 00:00:00 2001
From: Qing <cwq1913@gmail.com>
Date: Wed, 15 Nov 2023 17:18:32 +0800
Subject: [PATCH] fix outpainting image padding

---
 lama_cleaner/model/g_diffuser_bot.py | 59 +++++++++++-----------------
 1 file changed, 22 insertions(+), 37 deletions(-)

diff --git a/lama_cleaner/model/g_diffuser_bot.py b/lama_cleaner/model/g_diffuser_bot.py
index bf7ce2a..a4147af 100644
--- a/lama_cleaner/model/g_diffuser_bot.py
+++ b/lama_cleaner/model/g_diffuser_bot.py
@@ -85,59 +85,44 @@ def image_blur(data, std=3.14, mode="linear"):
     return np.real(convolve(data, kernel / np.sqrt(np.sum(kernel * kernel))))
 
 
-def soften_mask(np_rgba_image, softness, space):
+def soften_mask(mask_img, softness, space):
     if softness == 0:
-        return np_rgba_image
+        return mask_img
     softness = min(softness, 1.0)
     space = np.clip(space, 0.0, 1.0)
-    original_max_opacity = np.max(np_rgba_image[:, :, 3])
-    out_mask = np_rgba_image[:, :, 3] <= 0.0
-    blurred_mask = image_blur(np_rgba_image[:, :, 3], 3.5 / softness, mode="linear")
+    original_max_opacity = np.max(mask_img)
+    out_mask = mask_img <= 0.0
+    blurred_mask = image_blur(mask_img, 3.5 / softness, mode="linear")
     blurred_mask = np.maximum(blurred_mask - np.max(blurred_mask[out_mask]), 0.0)
-    np_rgba_image[
-        :, :, 3
-    ] *= blurred_mask  # preserve partial opacity in original input mask
-    np_rgba_image[:, :, 3] /= np.max(np_rgba_image[:, :, 3])  # renormalize
-    np_rgba_image[:, :, 3] = np.clip(
-        np_rgba_image[:, :, 3] - space, 0.0, 1.0
-    )  # make space
-    np_rgba_image[:, :, 3] /= np.max(np_rgba_image[:, :, 3])  # and renormalize again
-    np_rgba_image[:, :, 3] *= original_max_opacity  # restore original max opacity
-    return np_rgba_image
+    mask_img *= blurred_mask  # preserve partial opacity in original input mask
+    mask_img /= np.max(mask_img)  # renormalize
+    mask_img = np.clip(mask_img - space, 0.0, 1.0)  # make space
+    mask_img /= np.max(mask_img)  # and renormalize again
+    mask_img *= original_max_opacity  # restore original max opacity
+    return mask_img
 
 
 def expand_image(
     cv2_img, top: int, right: int, bottom: int, left: int, softness: float, space: float
 ):
+    assert cv2_img.shape[2] == 3
     origin_h, origin_w = cv2_img.shape[:2]
     new_width = cv2_img.shape[1] + left + right
     new_height = cv2_img.shape[0] + top + bottom
-    new_img = np.zeros((new_height, new_width, 4), np.uint8)  # expanded image is rgba
 
-    print(
-        "Expanding input image from {0}x{1} to {2}x{3}".format(
-            cv2_img.shape[1], cv2_img.shape[0], new_width, new_height
-        )
+    # TODO: which is better?
+    # new_img = np.random.randint(0, 255, (new_height, new_width, 3), np.uint8)
+    new_img = cv2.copyMakeBorder(
+        cv2_img, top, bottom, left, right, cv2.BORDER_REPLICATE
     )
-    if cv2_img.shape[2] == 3:  # rgb input image
-        new_img[
-            top : top + cv2_img.shape[0], left : left + cv2_img.shape[1], 0:3
-        ] = cv2_img
-        new_img[
-            top : top + cv2_img.shape[0], left : left + cv2_img.shape[1], 3
-        ] = 255  # fully opaque
-    elif cv2_img.shape[2] == 4:  # rgba input image
-        new_img[top : top + cv2_img.shape[0], left : left + cv2_img.shape[1]] = cv2_img
-    else:
-        raise Exception(
-            "Unsupported image format: {0} channels".format(cv2_img.shape[2])
-        )
+    mask_img = np.zeros((new_height, new_width), np.uint8)
+    mask_img[top : top + cv2_img.shape[0], left : left + cv2_img.shape[1]] = 255
 
     if softness > 0.0:
-        new_img = soften_mask(new_img / 255.0, softness / 100.0, space / 100.0)
-        new_img = (np.clip(new_img, 0.0, 1.0) * 255.0).astype(np.uint8)
+        mask_img = soften_mask(mask_img / 255.0, softness / 100.0, space / 100.0)
+        mask_img = (np.clip(mask_img, 0.0, 1.0) * 255.0).astype(np.uint8)
 
-    mask_image = 255.0 - new_img[:, :, 3]  # extract mask from alpha channel and invert
+    mask_image = 255.0 - mask_img  # extract mask from alpha channel and invert
     rgb_init_image = (
         0.0 + new_img[:, :, 0:3]
     )  # strip mask from init_img leaving only rgb channels
@@ -153,7 +138,7 @@ def expand_image(
         hard_mask[:, origin_w // 2 :] = 255
 
     hard_mask = cv2.copyMakeBorder(
-        hard_mask, top, bottom, left, right, cv2.BORDER_CONSTANT, value=255
+        hard_mask, top, bottom, left, right, cv2.BORDER_DEFAULT, value=255
     )
     mask_image = np.where(hard_mask > 0, mask_image, 0)
     return rgb_init_image.astype(np.uint8), mask_image.astype(np.uint8)