Merge pull request #208 from jiveabillion/main

UPDATE: Add Masking option
This commit is contained in:
Art Gourieff 2023-11-23 21:07:21 +07:00 committed by GitHub
commit 8b66464e6f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 568 additions and 8 deletions

147
scripts/entities/face.py Normal file
View File

@ -0,0 +1,147 @@
import traceback
import cv2
import numpy as np
from modules import images
from PIL import Image
from scripts.entities.rect import Point, Rect
class Face:
def __init__(self, entire_image: np.ndarray, face_area: Rect, face_margin: float, face_size: int, upscaler: str):
self.face_area = face_area
self.center = face_area.center
left, top, right, bottom = face_area.to_square()
self.left, self.top, self.right, self.bottom = self.__ensure_margin(
left, top, right, bottom, entire_image, face_margin
)
self.width = self.right - self.left
self.height = self.bottom - self.top
self.image = self.__crop_face_image(entire_image, face_size, upscaler)
self.face_size = face_size
self.scale_factor = face_size / self.width
self.face_area_on_image = self.__get_face_area_on_image()
self.landmarks_on_image = self.__get_landmarks_on_image()
def __get_face_area_on_image(self):
left = int((self.face_area.left - self.left) * self.scale_factor)
top = int((self.face_area.top - self.top) * self.scale_factor)
right = int((self.face_area.right - self.left) * self.scale_factor)
bottom = int((self.face_area.bottom - self.top) * self.scale_factor)
return self.__clip_values(left, top, right, bottom)
def __get_landmarks_on_image(self):
landmarks = []
if self.face_area.landmarks is not None:
for landmark in self.face_area.landmarks:
landmarks.append(
Point(
int((landmark.x - self.left) * self.scale_factor),
int((landmark.y - self.top) * self.scale_factor),
)
)
return landmarks
def __crop_face_image(self, entire_image: np.ndarray, face_size: int, upscaler: str):
cropped = entire_image[self.top : self.bottom, self.left : self.right, :]
if upscaler:
return images.resize_image(0, Image.fromarray(cropped), face_size, face_size, upscaler)
else:
return Image.fromarray(cv2.resize(cropped, dsize=(face_size, face_size)))
def __ensure_margin(self, left: int, top: int, right: int, bottom: int, entire_image: np.ndarray, margin: float):
entire_height, entire_width = entire_image.shape[:2]
side_length = right - left
margin = min(min(entire_height, entire_width) / side_length, margin)
diff = int((side_length * margin - side_length) / 2)
top = top - diff
bottom = bottom + diff
left = left - diff
right = right + diff
if top < 0:
bottom = bottom - top
top = 0
if left < 0:
right = right - left
left = 0
if bottom > entire_height:
top = top - (bottom - entire_height)
bottom = entire_height
if right > entire_width:
left = left - (right - entire_width)
right = entire_width
return left, top, right, bottom
def get_angle(self) -> float:
landmarks = getattr(self.face_area, "landmarks", None)
if landmarks is None:
return 0
eye1 = getattr(landmarks, "eye1", None)
eye2 = getattr(landmarks, "eye2", None)
if eye2 is None or eye1 is None:
return 0
try:
dx = eye2.x - eye1.x
dy = eye2.y - eye1.y
if dx == 0:
dx = 1
angle = np.arctan(dy / dx) * 180 / np.pi
if dx < 0:
angle = (angle + 180) % 360
return angle
except Exception:
print(traceback.format_exc())
return 0
def rotate_face_area_on_image(self, angle: float):
center = [
(self.face_area_on_image[0] + self.face_area_on_image[2]) / 2,
(self.face_area_on_image[1] + self.face_area_on_image[3]) / 2,
]
points = [
[self.face_area_on_image[0], self.face_area_on_image[1]],
[self.face_area_on_image[2], self.face_area_on_image[3]],
]
angle = np.radians(angle)
rot_matrix = np.array([[np.cos(angle), -np.sin(angle)], [np.sin(angle), np.cos(angle)]])
points = np.array(points) - center
points = np.dot(points, rot_matrix.T)
points += center
left, top, right, bottom = (int(points[0][0]), int(points[0][1]), int(points[1][0]), int(points[1][1]))
left, right = (right, left) if left > right else (left, right)
top, bottom = (bottom, top) if top > bottom else (top, bottom)
width, height = right - left, bottom - top
if width < height:
left, right = left - (height - width) // 2, right + (height - width) // 2
elif height < width:
top, bottom = top - (width - height) // 2, bottom + (width - height) // 2
return self.__clip_values(left, top, right, bottom)
def __clip_values(self, *args):
result = []
for val in args:
if val < 0:
result.append(0)
elif val > self.face_size:
result.append(self.face_size)
else:
result.append(val)
return tuple(result)

78
scripts/entities/rect.py Normal file
View File

@ -0,0 +1,78 @@
from typing import Dict, NamedTuple, Tuple
import numpy as np
class Point(NamedTuple):
x: int
y: int
class Landmarks(NamedTuple):
eye1: Point
eye2: Point
nose: Point
mouth1: Point
mouth2: Point
class Rect:
def __init__(
self,
left: int,
top: int,
right: int,
bottom: int,
tag: str = "face",
landmarks: Landmarks = None,
attributes: Dict[str, str] = {},
) -> None:
self.tag = tag
self.left = left
self.top = top
self.right = right
self.bottom = bottom
self.center = int((right + left) / 2)
self.middle = int((top + bottom) / 2)
self.width = right - left
self.height = bottom - top
self.size = self.width * self.height
self.landmarks = landmarks
self.attributes = attributes
@classmethod
def from_ndarray(
cls,
face_box: np.ndarray,
tag: str = "face",
landmarks: Landmarks = None,
attributes: Dict[str, str] = {},
) -> "Rect":
left, top, right, bottom, *_ = list(map(int, face_box))
return cls(left, top, right, bottom, tag, landmarks, attributes)
def to_tuple(self) -> Tuple[int, int, int, int]:
return self.left, self.top, self.right, self.bottom
def to_square(self):
left, top, right, bottom = self.to_tuple()
width = right - left
height = bottom - top
if width % 2 == 1:
right = right + 1
width = width + 1
if height % 2 == 1:
bottom = bottom + 1
height = height + 1
diff = int(abs(width - height) / 2)
if width > height:
top = top - diff
bottom = bottom + diff
else:
left = left - diff
right = right + diff
return left, top, right, bottom

View File

@ -0,0 +1,88 @@
from typing import List, Tuple
import cv2
import modules.shared as shared
import numpy as np
import torch
from facexlib.parsing import init_parsing_model
from facexlib.utils.misc import img2tensor
from torchvision.transforms.functional import normalize
from PIL import Image
from scripts.inferencers.mask_generator import MaskGenerator
from scripts.reactor_logger import logger
class BiSeNetMaskGenerator(MaskGenerator):
def __init__(self) -> None:
self.mask_model = init_parsing_model(device=shared.device)
def name(self):
return "BiSeNet"
def generate_mask(
self,
face_image: np.ndarray,
face_area_on_image: Tuple[int, int, int, int],
affected_areas: List[str],
mask_size: int,
use_minimal_area: bool,
fallback_ratio: float = 0.25,
**kwargs,
) -> np.ndarray:
original_face_image = face_image
face_image = face_image.copy()
face_image = face_image[:, :, ::-1]
if use_minimal_area:
face_image = MaskGenerator.mask_non_face_areas(face_image, face_area_on_image)
h, w, _ = face_image.shape
if w != 512 or h != 512:
rw = (int(w * (512 / w)) // 8) * 8
rh = (int(h * (512 / h)) // 8) * 8
face_image = cv2.resize(face_image, dsize=(rw, rh))
face_tensor = img2tensor(face_image.astype("float32") / 255.0, float32=True)
normalize(face_tensor, (0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True)
face_tensor = torch.unsqueeze(face_tensor, 0).to(shared.device)
with torch.no_grad():
face = self.mask_model(face_tensor)[0]
face = face.squeeze(0).cpu().numpy().argmax(0)
face = face.copy().astype(np.uint8)
mask = self.__to_mask(face, affected_areas)
if mask_size > 0:
mask = cv2.dilate(mask, np.ones((5, 5), np.uint8), iterations=mask_size)
if w != 512 or h != 512:
mask = cv2.resize(mask, dsize=(w, h))
"""if MaskGenerator.calculate_mask_coverage(mask) < fallback_ratio:
logger.info("Use fallback mask generator")
mask = self.fallback_mask_generator.generate_mask(
original_face_image, face_area_on_image, use_minimal_area=True
)"""
return mask
def __to_mask(self, face: np.ndarray, affected_areas: List[str]) -> np.ndarray:
keep_face = "Face" in affected_areas
keep_neck = "Neck" in affected_areas
keep_hair = "Hair" in affected_areas
keep_hat = "Hat" in affected_areas
mask = np.zeros((face.shape[0], face.shape[1], 3), dtype=np.uint8)
num_of_class = np.max(face)
for i in range(1, num_of_class + 1):
index = np.where(face == i)
if i < 14 and keep_face:
mask[index[0], index[1], :] = [255, 255, 255]
elif i == 14 and keep_neck:
mask[index[0], index[1], :] = [255, 255, 255]
elif i == 17 and keep_hair:
mask[index[0], index[1], :] = [255, 255, 255]
elif i == 18 and keep_hat:
mask[index[0], index[1], :] = [255, 255, 255]
return mask

View File

@ -0,0 +1,37 @@
from abc import ABC, abstractmethod
from typing import Tuple
import cv2
import numpy as np
class MaskGenerator(ABC):
@abstractmethod
def name(self) -> str:
pass
@abstractmethod
def generate_mask(
self,
face_image: np.ndarray,
face_area_on_image: Tuple[int, int, int, int],
**kwargs,
) -> np.ndarray:
pass
@staticmethod
def mask_non_face_areas(image: np.ndarray, face_area_on_image: Tuple[int, int, int, int]) -> np.ndarray:
left, top, right, bottom = face_area_on_image
image = image.copy()
image[:top, :] = 0
image[bottom:, :] = 0
image[:, :left] = 0
image[:, right:] = 0
return image
@staticmethod
def calculate_mask_coverage(mask: np.ndarray):
gray_mask = cv2.cvtColor(mask, cv2.COLOR_RGB2GRAY)
non_black_pixels = np.count_nonzero(gray_mask)
total_pixels = gray_mask.size
return non_black_pixels / total_pixels

View File

@ -66,6 +66,8 @@ class FaceSwapScript(scripts.Script):
img = gr.Image(type="pil") img = gr.Image(type="pil")
enable = gr.Checkbox(False, label="Enable", info=f"The Fast and Simple FaceSwap Extension - {version_flag}") enable = gr.Checkbox(False, label="Enable", info=f"The Fast and Simple FaceSwap Extension - {version_flag}")
save_original = gr.Checkbox(False, label="Save Original", info="Save the original image(s) made before swapping; If you use \"img2img\" - this option will affect with \"Swap in generated\" only") save_original = gr.Checkbox(False, label="Save Original", info="Save the original image(s) made before swapping; If you use \"img2img\" - this option will affect with \"Swap in generated\" only")
mask_face = gr.Checkbox(False, label="Mask Faces", info="Attempt to mask only the faces and eliminate pixelation of the image around the contours.")
gr.Markdown("<br>") gr.Markdown("<br>")
gr.Markdown("Source Image (above):") gr.Markdown("Source Image (above):")
with gr.Row(): with gr.Row():
@ -211,6 +213,7 @@ class FaceSwapScript(scripts.Script):
source_hash_check, source_hash_check,
target_hash_check, target_hash_check,
device, device,
mask_face
] ]
@ -264,6 +267,7 @@ class FaceSwapScript(scripts.Script):
source_hash_check, source_hash_check,
target_hash_check, target_hash_check,
device, device,
mask_face
): ):
self.enable = enable self.enable = enable
if self.enable: if self.enable:
@ -291,6 +295,7 @@ class FaceSwapScript(scripts.Script):
self.source_hash_check = source_hash_check self.source_hash_check = source_hash_check
self.target_hash_check = target_hash_check self.target_hash_check = target_hash_check
self.device = device self.device = device
self.mask_face = mask_face
if self.gender_source is None or self.gender_source == "No": if self.gender_source is None or self.gender_source == "No":
self.gender_source = 0 self.gender_source = 0
if self.gender_target is None or self.gender_target == "No": if self.gender_target is None or self.gender_target == "No":
@ -334,6 +339,7 @@ class FaceSwapScript(scripts.Script):
source_hash_check=self.source_hash_check, source_hash_check=self.source_hash_check,
target_hash_check=self.target_hash_check, target_hash_check=self.target_hash_check,
device=self.device, device=self.device,
mask_face=mask_face
) )
p.init_images[i] = result p.init_images[i] = result
# result_path = get_image_path(p.init_images[i], p.outpath_samples, "", p.all_seeds[i], p.all_prompts[i], "txt", p=p, suffix="-swapped") # result_path = get_image_path(p.init_images[i], p.outpath_samples, "", p.all_seeds[i], p.all_prompts[i], "txt", p=p, suffix="-swapped")
@ -385,6 +391,7 @@ class FaceSwapScript(scripts.Script):
source_hash_check=self.source_hash_check, source_hash_check=self.source_hash_check,
target_hash_check=self.target_hash_check, target_hash_check=self.target_hash_check,
device=self.device, device=self.device,
mask_face=self.mask_face
) )
if result is not None and swapped > 0: if result is not None and swapped > 0:
result_images.append(result) result_images.append(result)
@ -442,6 +449,7 @@ class FaceSwapScript(scripts.Script):
source_hash_check=self.source_hash_check, source_hash_check=self.source_hash_check,
target_hash_check=self.target_hash_check, target_hash_check=self.target_hash_check,
device=self.device, device=self.device,
mask_face=self.mask_face
) )
try: try:
pp = scripts_postprocessing.PostprocessedImage(result) pp = scripts_postprocessing.PostprocessedImage(result)
@ -468,6 +476,8 @@ class FaceSwapScriptExtras(scripts_postprocessing.ScriptPostprocessing):
with gr.Column(): with gr.Column():
img = gr.Image(type="pil") img = gr.Image(type="pil")
enable = gr.Checkbox(False, label="Enable", info=f"The Fast and Simple FaceSwap Extension - {version_flag}") enable = gr.Checkbox(False, label="Enable", info=f"The Fast and Simple FaceSwap Extension - {version_flag}")
mask_face = gr.Checkbox(False, label="Mask Faces", info="Attempt to mask only the faces and eliminate pixelation of the image around the contours.")
gr.Markdown("Source Image (above):") gr.Markdown("Source Image (above):")
with gr.Row(): with gr.Row():
source_faces_index = gr.Textbox( source_faces_index = gr.Textbox(
@ -582,6 +592,7 @@ class FaceSwapScriptExtras(scripts_postprocessing.ScriptPostprocessing):
'gender_target': gender_target, 'gender_target': gender_target,
'codeformer_weight': codeformer_weight, 'codeformer_weight': codeformer_weight,
'device': device, 'device': device,
'mask_face':mask_face
} }
return args return args
@ -631,6 +642,7 @@ class FaceSwapScriptExtras(scripts_postprocessing.ScriptPostprocessing):
self.gender_target = args['gender_target'] self.gender_target = args['gender_target']
self.codeformer_weight = args['codeformer_weight'] self.codeformer_weight = args['codeformer_weight']
self.device = args['device'] self.device = args['device']
self.mask_face = args['mask_face']
if self.gender_source is None or self.gender_source == "No": if self.gender_source is None or self.gender_source == "No":
self.gender_source = 0 self.gender_source = 0
if self.gender_target is None or self.gender_target == "No": if self.gender_target is None or self.gender_target == "No":
@ -669,6 +681,7 @@ class FaceSwapScriptExtras(scripts_postprocessing.ScriptPostprocessing):
source_hash_check=True, source_hash_check=True,
target_hash_check=True, target_hash_check=True,
device=self.device, device=self.device,
mask_face=self.mask_face
) )
try: try:
pp.info["ReActor"] = True pp.info["ReActor"] = True

View File

@ -5,10 +5,13 @@ from typing import List, Union
import cv2 import cv2
import numpy as np import numpy as np
from PIL import Image from numpy import uint8
from PIL import Image, ImageDraw
from scripts.inferencers.bisenet_mask_generator import BiSeNetMaskGenerator
from scripts.entities.face import Face
from scripts.entities.rect import Rect
import insightface import insightface
from torchvision.transforms.functional import to_pil_image
from scripts.reactor_helpers import get_image_md5hash, get_Device from scripts.reactor_helpers import get_image_md5hash, get_Device
from modules.face_restoration import FaceRestoration from modules.face_restoration import FaceRestoration
try: # A1111 try: # A1111
@ -18,6 +21,7 @@ except: # SD.Next
from modules.upscaler import UpscalerData from modules.upscaler import UpscalerData
from modules.shared import state from modules.shared import state
from scripts.reactor_logger import logger from scripts.reactor_logger import logger
try: try:
from modules.paths_internal import models_path from modules.paths_internal import models_path
except: except:
@ -76,8 +80,9 @@ def check_process_halt(msgforced: bool = False):
FS_MODEL = None FS_MODEL = None
MASK_MODEL = None
CURRENT_FS_MODEL_PATH = None CURRENT_FS_MODEL_PATH = None
CURRENT_MASK_MODEL_PATH = None
ANALYSIS_MODEL = None ANALYSIS_MODEL = None
SOURCE_FACES = None SOURCE_FACES = None
@ -105,6 +110,8 @@ def getFaceSwapModel(model_path: str):
return FS_MODEL return FS_MODEL
def restore_face(image: Image, enhancement_options: EnhancementOptions): def restore_face(image: Image, enhancement_options: EnhancementOptions):
result_image = image result_image = image
@ -168,6 +175,27 @@ def enhance_image(image: Image, enhancement_options: EnhancementOptions):
result_image = restore_face(result_image, enhancement_options) result_image = restore_face(result_image, enhancement_options)
return result_image return result_image
def enhance_image_and_mask(image: Image.Image, enhancement_options: EnhancementOptions,target_img_orig:Image.Image,entire_mask_image:Image.Image)->Image.Image:
result_image = image
if check_process_halt(msgforced=True):
return result_image
if enhancement_options.do_restore_first:
result_image = restore_face(result_image, enhancement_options)
result_image = Image.composite(result_image,target_img_orig,entire_mask_image)
result_image = upscale_image(result_image, enhancement_options)
else:
result_image = upscale_image(result_image, enhancement_options)
entire_mask_image = Image.fromarray(cv2.resize(np.array(entire_mask_image),result_image.size, interpolation=cv2.INTER_AREA)).convert("L")
result_image = Image.composite(result_image,target_img_orig,entire_mask_image)
result_image = restore_face(result_image, enhancement_options)
return result_image
def get_gender(face, face_index): def get_gender(face, face_index):
gender = [ gender = [
@ -282,6 +310,7 @@ def swap_face(
source_hash_check: bool = True, source_hash_check: bool = True,
target_hash_check: bool = False, target_hash_check: bool = False,
device: str = "CPU", device: str = "CPU",
mask_face:bool = False
): ):
global SOURCE_FACES, SOURCE_IMAGE_HASH, TARGET_FACES, TARGET_IMAGE_HASH, PROVIDERS global SOURCE_FACES, SOURCE_IMAGE_HASH, TARGET_FACES, TARGET_IMAGE_HASH, PROVIDERS
result_image = target_img result_image = target_img
@ -308,7 +337,8 @@ def swap_face(
source_img = cv2.cvtColor(np.array(source_img), cv2.COLOR_RGB2BGR) source_img = cv2.cvtColor(np.array(source_img), cv2.COLOR_RGB2BGR)
target_img = cv2.cvtColor(np.array(target_img), cv2.COLOR_RGB2BGR) target_img = cv2.cvtColor(np.array(target_img), cv2.COLOR_RGB2BGR)
target_img_orig = cv2.cvtColor(np.array(target_img), cv2.COLOR_RGB2BGR)
entire_mask_image = np.zeros_like(np.array(target_img))
output: List = [] output: List = []
output_info: str = "" output_info: str = ""
swapped = 0 swapped = 0
@ -411,7 +441,12 @@ def swap_face(
if target_face is not None and wrong_gender == 0: if target_face is not None and wrong_gender == 0:
logger.status("Swapping Source into Target") logger.status("Swapping Source into Target")
result = face_swapper.get(result, target_face, source_face) swapped_image = face_swapper.get(result, target_face, source_face)
if mask_face:
result = apply_face_mask(swapped_image=swapped_image,target_image=result,target_face=target_face,entire_mask_image=entire_mask_image)
else:
result = swapped_image
swapped += 1 swapped += 1
elif wrong_gender == 1: elif wrong_gender == 1:
@ -445,7 +480,12 @@ def swap_face(
result_image = Image.fromarray(cv2.cvtColor(result, cv2.COLOR_BGR2RGB)) result_image = Image.fromarray(cv2.cvtColor(result, cv2.COLOR_BGR2RGB))
if enhancement_options is not None and swapped > 0: if enhancement_options is not None and swapped > 0:
if mask_face and entire_mask_image is not None:
result_image = enhance_image_and_mask(result_image, enhancement_options,Image.fromarray(target_img_orig),Image.fromarray(entire_mask_image).convert("L"))
else:
result_image = enhance_image(result_image, enhancement_options) result_image = enhance_image(result_image, enhancement_options)
elif mask_face and entire_mask_image is not None and swapped > 0:
result_image = Image.composite(result_image,Image.fromarray(target_img_orig),Image.fromarray(entire_mask_image).convert("L"))
else: else:
logger.status("No source face(s) in the provided Index") logger.status("No source face(s) in the provided Index")
@ -453,3 +493,160 @@ def swap_face(
logger.status("No source face(s) found") logger.status("No source face(s) found")
return result_image, output, swapped return result_image, output, swapped
def apply_face_mask(swapped_image:np.ndarray,target_image:np.ndarray,target_face,entire_mask_image:np.array)->np.ndarray:
logger.status("Masking Face")
mask_generator = BiSeNetMaskGenerator()
face = Face(target_image,Rect.from_ndarray(np.array(target_face.bbox)),1.6,512,"")
face_image = np.array(face.image)
process_face_image(face)
face_area_on_image = face.face_area_on_image
mask = mask_generator.generate_mask(face_image,face_area_on_image=face_area_on_image,affected_areas=["Face"],mask_size=0,use_minimal_area=True)
mask = cv2.blur(mask, (12, 12))
"""entire_mask_image = np.zeros_like(target_image)"""
larger_mask = cv2.resize(mask, dsize=(face.width, face.height))
entire_mask_image[
face.top : face.bottom,
face.left : face.right,
] = larger_mask
result = Image.composite(Image.fromarray(swapped_image),Image.fromarray(target_image), Image.fromarray(entire_mask_image).convert("L"))
return np.array(result)
def correct_face_tilt(angle: float) -> bool:
angle = abs(angle)
if angle > 180:
angle = 360 - angle
return angle > 40
def _dilate(arr: np.ndarray, value: int) -> np.ndarray:
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (value, value))
return cv2.dilate(arr, kernel, iterations=1)
def _erode(arr: np.ndarray, value: int) -> np.ndarray:
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (value, value))
return cv2.erode(arr, kernel, iterations=1)
colors = [
(255, 0, 0),
(0, 255, 0),
(0, 0, 255),
(255, 255, 0),
(255, 0, 255),
(0, 255, 255),
(255, 255, 255),
(128, 0, 0),
(0, 128, 0),
(128, 128, 0),
(0, 0, 128),
(0, 128, 128),
]
def color_generator(colors):
while True:
for color in colors:
yield color
color_iter = color_generator(colors)
def process_face_image(
face: Face,
**kwargs,
) -> Image:
image = np.array(face.image)
overlay = image.copy()
cv2.rectangle(overlay, (0, 0), (image.shape[1], image.shape[0]), next(color_iter), -1)
l, t, r, b = face.face_area_on_image
cv2.rectangle(overlay, (l, t), (r, b), (0, 0, 0), 10)
if face.landmarks_on_image is not None:
for landmark in face.landmarks_on_image:
cv2.circle(overlay, (int(landmark.x), int(landmark.y)), 6, (0, 0, 0), 10)
alpha = 0.3
output = cv2.addWeighted(image, 1 - alpha, overlay, alpha, 0)
return Image.fromarray(output)
def dilate_erode(img: Image.Image, value: int) -> Image.Image:
"""
The dilate_erode function takes an image and a value.
If the value is positive, it dilates the image by that amount.
If the value is negative, it erodes the image by that amount.
Parameters
----------
img: PIL.Image.Image
the image to be processed
value: int
kernel size of dilation or erosion
Returns
-------
PIL.Image.Image
The image that has been dilated or eroded
"""
if value == 0:
return img
arr = np.array(img)
arr = _dilate(arr, value) if value > 0 else _erode(arr, -value)
return Image.fromarray(arr)
def mask_to_pil(masks, shape: tuple[int, int]) -> list[Image.Image]:
"""
Parameters
----------
masks: torch.Tensor, dtype=torch.float32, shape=(N, H, W).
The device can be CUDA, but `to_pil_image` takes care of that.
shape: tuple[int, int]
(width, height) of the original image
"""
n = masks.shape[0]
return [to_pil_image(masks[i], mode="L").resize(shape) for i in range(n)]
def create_mask_from_bbox(
bboxes: list[list[float]], shape: tuple[int, int]
) -> list[Image.Image]:
"""
Parameters
----------
bboxes: list[list[float]]
list of [x1, y1, x2, y2]
bounding boxes
shape: tuple[int, int]
shape of the image (width, height)
Returns
-------
masks: list[Image.Image]
A list of masks
"""
masks = []
for bbox in bboxes:
mask = Image.new("L", shape, 0)
mask_draw = ImageDraw.Draw(mask)
mask_draw.rectangle(bbox, fill=255)
masks.append(mask)
return masks
def rotate_image(image: Image, angle: float) -> Image:
if angle == 0:
return image
return Image.fromarray(rotate_array(np.array(image), angle))
def rotate_array(image: np.ndarray, angle: float) -> np.ndarray:
if angle == 0:
return image
h, w = image.shape[:2]
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, angle, 1.0)
return cv2.warpAffine(image, M, (w, h))