mirror of
https://github.com/microsoft/TRELLIS.2.git
synced 2026-04-04 03:27:08 -04:00
374 lines
13 KiB
Python
374 lines
13 KiB
Python
import re
|
|
import numpy as np
|
|
import cv2
|
|
import torch
|
|
import contextlib
|
|
|
|
|
|
# Dictionary utils
|
|
def _dict_merge(dicta, dictb, prefix=''):
|
|
"""
|
|
Merge two dictionaries.
|
|
"""
|
|
assert isinstance(dicta, dict), 'input must be a dictionary'
|
|
assert isinstance(dictb, dict), 'input must be a dictionary'
|
|
dict_ = {}
|
|
all_keys = set(dicta.keys()).union(set(dictb.keys()))
|
|
for key in all_keys:
|
|
if key in dicta.keys() and key in dictb.keys():
|
|
if isinstance(dicta[key], dict) and isinstance(dictb[key], dict):
|
|
dict_[key] = _dict_merge(dicta[key], dictb[key], prefix=f'{prefix}.{key}')
|
|
else:
|
|
raise ValueError(f'Duplicate key {prefix}.{key} found in both dictionaries. Types: {type(dicta[key])}, {type(dictb[key])}')
|
|
elif key in dicta.keys():
|
|
dict_[key] = dicta[key]
|
|
else:
|
|
dict_[key] = dictb[key]
|
|
return dict_
|
|
|
|
|
|
def dict_merge(dicta, dictb):
|
|
"""
|
|
Merge two dictionaries.
|
|
"""
|
|
return _dict_merge(dicta, dictb, prefix='')
|
|
|
|
|
|
def dict_foreach(dic, func, special_func={}):
|
|
"""
|
|
Recursively apply a function to all non-dictionary leaf values in a dictionary.
|
|
"""
|
|
assert isinstance(dic, dict), 'input must be a dictionary'
|
|
for key in dic.keys():
|
|
if isinstance(dic[key], dict):
|
|
dic[key] = dict_foreach(dic[key], func)
|
|
else:
|
|
if key in special_func.keys():
|
|
dic[key] = special_func[key](dic[key])
|
|
else:
|
|
dic[key] = func(dic[key])
|
|
return dic
|
|
|
|
|
|
def dict_reduce(dicts, func, special_func={}):
|
|
"""
|
|
Reduce a list of dictionaries. Leaf values must be scalars.
|
|
"""
|
|
assert isinstance(dicts, list), 'input must be a list of dictionaries'
|
|
assert all([isinstance(d, dict) for d in dicts]), 'input must be a list of dictionaries'
|
|
assert len(dicts) > 0, 'input must be a non-empty list of dictionaries'
|
|
all_keys = set([key for dict_ in dicts for key in dict_.keys()])
|
|
reduced_dict = {}
|
|
for key in all_keys:
|
|
vlist = [dict_[key] for dict_ in dicts if key in dict_.keys()]
|
|
if isinstance(vlist[0], dict):
|
|
reduced_dict[key] = dict_reduce(vlist, func, special_func)
|
|
else:
|
|
if key in special_func.keys():
|
|
reduced_dict[key] = special_func[key](vlist)
|
|
else:
|
|
reduced_dict[key] = func(vlist)
|
|
return reduced_dict
|
|
|
|
|
|
def dict_any(dic, func):
|
|
"""
|
|
Recursively apply a function to all non-dictionary leaf values in a dictionary.
|
|
"""
|
|
assert isinstance(dic, dict), 'input must be a dictionary'
|
|
for key in dic.keys():
|
|
if isinstance(dic[key], dict):
|
|
if dict_any(dic[key], func):
|
|
return True
|
|
else:
|
|
if func(dic[key]):
|
|
return True
|
|
return False
|
|
|
|
|
|
def dict_all(dic, func):
|
|
"""
|
|
Recursively apply a function to all non-dictionary leaf values in a dictionary.
|
|
"""
|
|
assert isinstance(dic, dict), 'input must be a dictionary'
|
|
for key in dic.keys():
|
|
if isinstance(dic[key], dict):
|
|
if not dict_all(dic[key], func):
|
|
return False
|
|
else:
|
|
if not func(dic[key]):
|
|
return False
|
|
return True
|
|
|
|
|
|
def dict_flatten(dic, sep='.'):
|
|
"""
|
|
Flatten a nested dictionary into a dictionary with no nested dictionaries.
|
|
"""
|
|
assert isinstance(dic, dict), 'input must be a dictionary'
|
|
flat_dict = {}
|
|
for key in dic.keys():
|
|
if isinstance(dic[key], dict):
|
|
sub_dict = dict_flatten(dic[key], sep=sep)
|
|
for sub_key in sub_dict.keys():
|
|
flat_dict[str(key) + sep + str(sub_key)] = sub_dict[sub_key]
|
|
else:
|
|
flat_dict[key] = dic[key]
|
|
return flat_dict
|
|
|
|
|
|
# Context utils
|
|
@contextlib.contextmanager
|
|
def nested_contexts(*contexts):
|
|
with contextlib.ExitStack() as stack:
|
|
for ctx in contexts:
|
|
stack.enter_context(ctx())
|
|
yield
|
|
|
|
|
|
# Image utils
|
|
def make_grid(images, nrow=None, ncol=None, aspect_ratio=None):
|
|
num_images = len(images)
|
|
if nrow is None and ncol is None:
|
|
if aspect_ratio is not None:
|
|
nrow = int(np.round(np.sqrt(num_images / aspect_ratio)))
|
|
else:
|
|
nrow = int(np.sqrt(num_images))
|
|
ncol = (num_images + nrow - 1) // nrow
|
|
elif nrow is None and ncol is not None:
|
|
nrow = (num_images + ncol - 1) // ncol
|
|
elif nrow is not None and ncol is None:
|
|
ncol = (num_images + nrow - 1) // nrow
|
|
else:
|
|
assert nrow * ncol >= num_images, 'nrow * ncol must be greater than or equal to the number of images'
|
|
|
|
if images[0].ndim == 2:
|
|
grid = np.zeros((nrow * images[0].shape[0], ncol * images[0].shape[1]), dtype=images[0].dtype)
|
|
else:
|
|
grid = np.zeros((nrow * images[0].shape[0], ncol * images[0].shape[1], images[0].shape[2]), dtype=images[0].dtype)
|
|
for i, img in enumerate(images):
|
|
row = i // ncol
|
|
col = i % ncol
|
|
grid[row * img.shape[0]:(row + 1) * img.shape[0], col * img.shape[1]:(col + 1) * img.shape[1]] = img
|
|
return grid
|
|
|
|
|
|
def notes_on_image(img, notes=None):
|
|
img = np.pad(img, ((0, 32), (0, 0), (0, 0)), 'constant', constant_values=0)
|
|
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
|
|
if notes is not None:
|
|
img = cv2.putText(img, notes, (0, img.shape[0] - 4), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 1)
|
|
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
|
return img
|
|
|
|
|
|
|
|
def text_image(text, resolution=(512, 512), max_size=0.5, h_align="left", v_align="center"):
|
|
"""
|
|
Draw text on an image of the given resolution. The text is automatically wrapped
|
|
and scaled so that it fits completely within the image while preserving any explicit
|
|
line breaks and original spacing. Horizontal and vertical alignment can be controlled
|
|
via flags.
|
|
|
|
Parameters:
|
|
text (str): The input text. Newline characters and spacing are preserved.
|
|
resolution (tuple): The image resolution as (width, height).
|
|
max_size (float): The maximum font size.
|
|
h_align (str): Horizontal alignment. Options: "left", "center", "right".
|
|
v_align (str): Vertical alignment. Options: "top", "center", "bottom".
|
|
|
|
Returns:
|
|
numpy.ndarray: The resulting image (BGR format) with the text drawn.
|
|
"""
|
|
width, height = resolution
|
|
# Create a white background image
|
|
img = np.full((height, width, 3), 255, dtype=np.uint8)
|
|
|
|
# Set margins and compute available drawing area
|
|
margin = 10
|
|
avail_width = width - 2 * margin
|
|
avail_height = height - 2 * margin
|
|
|
|
# Choose OpenCV font and text thickness
|
|
font = cv2.FONT_HERSHEY_SIMPLEX
|
|
thickness = 1
|
|
# Ratio for additional spacing between lines (relative to the height of "A")
|
|
line_spacing_ratio = 0.5
|
|
|
|
def wrap_line(line, max_width, font, thickness, scale):
|
|
"""
|
|
Wrap a single line of text into multiple lines such that each line's
|
|
width (measured at the given scale) does not exceed max_width.
|
|
This function preserves the original spacing by splitting the line into tokens
|
|
(words and whitespace) using a regular expression.
|
|
|
|
Parameters:
|
|
line (str): The input text line.
|
|
max_width (int): Maximum allowed width in pixels.
|
|
font (int): OpenCV font identifier.
|
|
thickness (int): Text thickness.
|
|
scale (float): The current font scale.
|
|
|
|
Returns:
|
|
List[str]: A list of wrapped lines.
|
|
"""
|
|
# Split the line into tokens (words and whitespace), preserving spacing
|
|
tokens = re.split(r'(\s+)', line)
|
|
if not tokens:
|
|
return ['']
|
|
|
|
wrapped_lines = []
|
|
current_line = ""
|
|
for token in tokens:
|
|
candidate = current_line + token
|
|
candidate_width = cv2.getTextSize(candidate, font, scale, thickness)[0][0]
|
|
if candidate_width <= max_width:
|
|
current_line = candidate
|
|
else:
|
|
# If current_line is empty, the token itself is too wide;
|
|
# break the token character by character.
|
|
if current_line == "":
|
|
sub_token = ""
|
|
for char in token:
|
|
candidate_char = sub_token + char
|
|
if cv2.getTextSize(candidate_char, font, scale, thickness)[0][0] <= max_width:
|
|
sub_token = candidate_char
|
|
else:
|
|
if sub_token:
|
|
wrapped_lines.append(sub_token)
|
|
sub_token = char
|
|
current_line = sub_token
|
|
else:
|
|
wrapped_lines.append(current_line)
|
|
current_line = token
|
|
if current_line:
|
|
wrapped_lines.append(current_line)
|
|
return wrapped_lines
|
|
|
|
def compute_text_block(scale):
|
|
"""
|
|
Wrap the entire text (splitting at explicit newline characters) using the
|
|
provided scale, and then compute the overall width and height of the text block.
|
|
|
|
Returns:
|
|
wrapped_lines (List[str]): The list of wrapped lines.
|
|
block_width (int): Maximum width among the wrapped lines.
|
|
block_height (int): Total height of the text block including spacing.
|
|
sizes (List[tuple]): A list of (width, height) for each wrapped line.
|
|
spacing (int): The spacing between lines (computed from the scaled "A" height).
|
|
"""
|
|
# Split text by explicit newlines
|
|
input_lines = text.splitlines() if text else ['']
|
|
wrapped_lines = []
|
|
for line in input_lines:
|
|
wrapped = wrap_line(line, avail_width, font, thickness, scale)
|
|
wrapped_lines.extend(wrapped)
|
|
|
|
sizes = []
|
|
for line in wrapped_lines:
|
|
(text_size, _) = cv2.getTextSize(line, font, scale, thickness)
|
|
sizes.append(text_size) # (width, height)
|
|
|
|
block_width = max((w for w, h in sizes), default=0)
|
|
# Use the height of "A" (at the current scale) to compute line spacing
|
|
base_height = cv2.getTextSize("A", font, scale, thickness)[0][1]
|
|
spacing = int(line_spacing_ratio * base_height)
|
|
block_height = sum(h for w, h in sizes) + spacing * (len(sizes) - 1) if sizes else 0
|
|
|
|
return wrapped_lines, block_width, block_height, sizes, spacing
|
|
|
|
# Use binary search to find the maximum scale that allows the text block to fit
|
|
lo = 0.001
|
|
hi = max_size
|
|
eps = 0.001 # convergence threshold
|
|
best_scale = lo
|
|
best_result = None
|
|
|
|
while hi - lo > eps:
|
|
mid = (lo + hi) / 2
|
|
wrapped_lines, block_width, block_height, sizes, spacing = compute_text_block(mid)
|
|
# Ensure that both width and height constraints are met
|
|
if block_width <= avail_width and block_height <= avail_height:
|
|
best_scale = mid
|
|
best_result = (wrapped_lines, block_width, block_height, sizes, spacing)
|
|
lo = mid # try a larger scale
|
|
else:
|
|
hi = mid # reduce the scale
|
|
|
|
if best_result is None:
|
|
best_scale = 0.5
|
|
best_result = compute_text_block(best_scale)
|
|
|
|
wrapped_lines, block_width, block_height, sizes, spacing = best_result
|
|
|
|
# Compute starting y-coordinate based on vertical alignment flag
|
|
if v_align == "top":
|
|
y_top = margin
|
|
elif v_align == "center":
|
|
y_top = margin + (avail_height - block_height) // 2
|
|
elif v_align == "bottom":
|
|
y_top = margin + (avail_height - block_height)
|
|
else:
|
|
y_top = margin + (avail_height - block_height) // 2 # default to center if invalid flag
|
|
|
|
# For cv2.putText, the y coordinate represents the text baseline;
|
|
# so for the first line add its height.
|
|
y = y_top + (sizes[0][1] if sizes else 0)
|
|
|
|
# Draw each line with horizontal alignment based on the flag
|
|
for i, line in enumerate(wrapped_lines):
|
|
line_width, line_height = sizes[i]
|
|
if h_align == "left":
|
|
x = margin
|
|
elif h_align == "center":
|
|
x = margin + (avail_width - line_width) // 2
|
|
elif h_align == "right":
|
|
x = margin + (avail_width - line_width)
|
|
else:
|
|
x = margin # default to left if invalid flag
|
|
|
|
cv2.putText(img, line, (x, y), font, best_scale, (0, 0, 0), thickness, cv2.LINE_AA)
|
|
y += line_height + spacing
|
|
|
|
return img
|
|
|
|
|
|
def save_image_with_notes(img, path, notes=None):
|
|
"""
|
|
Save an image with notes.
|
|
"""
|
|
if isinstance(img, torch.Tensor):
|
|
img = img.cpu().numpy().transpose(1, 2, 0)
|
|
if img.dtype == np.float32 or img.dtype == np.float64:
|
|
img = np.clip(img * 255, 0, 255).astype(np.uint8)
|
|
img = notes_on_image(img, notes)
|
|
cv2.imwrite(path, cv2.cvtColor(img, cv2.COLOR_RGB2BGR))
|
|
|
|
|
|
# debug utils
|
|
|
|
def atol(x, y):
|
|
"""
|
|
Absolute tolerance.
|
|
"""
|
|
return torch.abs(x - y)
|
|
|
|
|
|
def rtol(x, y):
|
|
"""
|
|
Relative tolerance.
|
|
"""
|
|
return torch.abs(x - y) / torch.clamp_min(torch.maximum(torch.abs(x), torch.abs(y)), 1e-12)
|
|
|
|
|
|
# print utils
|
|
def indent(s, n=4):
|
|
"""
|
|
Indent a string.
|
|
"""
|
|
lines = s.split('\n')
|
|
for i in range(1, len(lines)):
|
|
lines[i] = ' ' * n + lines[i]
|
|
return '\n'.join(lines)
|
|
|