fix: refine workflow, enforce minimum font size, tune params
Browse files- font_dataset/layout.py +196 -212
font_dataset/layout.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
__all__ = ["
|
| 2 |
|
| 3 |
|
| 4 |
epislon = 1e-6
|
|
@@ -47,8 +47,8 @@ stroke_width_max_ratio = 0.25
|
|
| 47 |
assert gray_ratio + color_ratio - 1 < epislon
|
| 48 |
|
| 49 |
# clip size ratio
|
| 50 |
-
clip_width_max_ratio = 0.
|
| 51 |
-
clip_width_min_ratio = 0.
|
| 52 |
clip_width_height_min_ratio = 0.75
|
| 53 |
clip_width_height_max_ratio = 1.25
|
| 54 |
|
|
@@ -69,9 +69,9 @@ assert no_rotation_ratio + rotation_ratio - 1 < epislon
|
|
| 69 |
# in degree
|
| 70 |
rotation_max_angle = 30
|
| 71 |
|
| 72 |
-
|
| 73 |
-
cjk_ratio = 3
|
| 74 |
|
|
|
|
| 75 |
cjk_distribution = {
|
| 76 |
"ja": 0.3,
|
| 77 |
"ko": 0.2,
|
|
@@ -83,17 +83,10 @@ cjk_distribution = {
|
|
| 83 |
|
| 84 |
assert sum(cjk_distribution.values()) - 1 < epislon
|
| 85 |
|
| 86 |
-
train_cnt = 100
|
| 87 |
-
val_cnt = 10
|
| 88 |
-
test_cnt = 30
|
| 89 |
-
|
| 90 |
-
train_cnt_cjk = int(train_cnt * cjk_ratio)
|
| 91 |
-
val_cnt_cjk = int(val_cnt * cjk_ratio)
|
| 92 |
-
test_cnt_cjk = int(test_cnt * cjk_ratio)
|
| 93 |
-
|
| 94 |
|
| 95 |
import math
|
| 96 |
import random
|
|
|
|
| 97 |
from PIL import Image, ImageDraw, ImageFont
|
| 98 |
from .fontlabel import FontLabel
|
| 99 |
from .font import DSFont
|
|
@@ -242,209 +235,200 @@ def RGB2RGBA(color):
|
|
| 242 |
return color + (255,)
|
| 243 |
|
| 244 |
|
| 245 |
-
def
|
| 246 |
img_path: str, font: DSFont, corpus_manager: CorpusGeneratorManager
|
| 247 |
) -> tuple[Image.Image, FontLabel]:
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 310 |
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 318 |
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
if random.random() < no_rotation_ratio:
|
| 338 |
-
render_angle = 0
|
| 339 |
-
|
| 340 |
-
render_calculation_width = render_calculation_width_no_rotation
|
| 341 |
-
render_calculation_height = render_calculation_height_no_rotation
|
| 342 |
-
else:
|
| 343 |
-
render_angle = random.randint(-rotation_max_angle, rotation_max_angle)
|
| 344 |
-
|
| 345 |
-
render_calculation_width = int(
|
| 346 |
-
render_calculation_width_no_rotation
|
| 347 |
-
* math.cos(math.radians(abs(render_angle)))
|
| 348 |
-
+ render_calculation_height_no_rotation
|
| 349 |
-
* math.sin(math.radians(abs(render_angle)))
|
| 350 |
-
)
|
| 351 |
-
render_calculation_height = int(
|
| 352 |
-
render_calculation_width_no_rotation
|
| 353 |
-
* math.sin(math.radians(abs(render_angle)))
|
| 354 |
-
+ render_calculation_height_no_rotation
|
| 355 |
-
* math.cos(math.radians(abs(render_angle)))
|
| 356 |
-
)
|
| 357 |
-
|
| 358 |
-
# calculate render size
|
| 359 |
-
render_ratio = (
|
| 360 |
-
random.random() * (text_longer_max_ratio - text_longer_min_ratio)
|
| 361 |
-
+ text_longer_min_ratio
|
| 362 |
-
)
|
| 363 |
-
if (
|
| 364 |
-
render_calculation_width / render_calculation_height
|
| 365 |
-
< clip_width / clip_height
|
| 366 |
-
):
|
| 367 |
-
# height is the limit
|
| 368 |
-
render_height = int(clip_height * render_ratio)
|
| 369 |
-
render_width = int(
|
| 370 |
-
render_calculation_width / render_calculation_height * render_height
|
| 371 |
-
)
|
| 372 |
-
else:
|
| 373 |
-
# width is the limit
|
| 374 |
-
render_width = int(clip_width * render_ratio)
|
| 375 |
-
render_height = int(
|
| 376 |
-
render_calculation_height / render_calculation_width * render_width
|
| 377 |
-
)
|
| 378 |
-
|
| 379 |
-
# calculate text size
|
| 380 |
-
text_size = int(
|
| 381 |
-
render_calculation_size * render_height / render_calculation_height
|
| 382 |
-
)
|
| 383 |
-
render_width_no_rotation = int(
|
| 384 |
-
render_calculation_width_no_rotation
|
| 385 |
-
/ render_calculation_height
|
| 386 |
-
* render_height
|
| 387 |
-
)
|
| 388 |
-
render_height_no_rotation = int(
|
| 389 |
-
render_calculation_height_no_rotation
|
| 390 |
-
/ render_calculation_height
|
| 391 |
-
* render_height
|
| 392 |
-
)
|
| 393 |
-
render_font_x_no_rotation = int(
|
| 394 |
-
render_calculation_font_x_no_rotation
|
| 395 |
-
/ render_calculation_height
|
| 396 |
-
* render_height
|
| 397 |
-
)
|
| 398 |
-
render_font_y_no_rotation = int(
|
| 399 |
-
render_calculation_font_y_no_rotation
|
| 400 |
-
/ render_calculation_height
|
| 401 |
-
* render_height
|
| 402 |
-
)
|
| 403 |
-
stroke_width = int(text_size * stroke_ratio)
|
| 404 |
-
line_spacing = int(text_size * line_spacing_ratio)
|
| 405 |
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 409 |
|
| 410 |
-
|
| 411 |
-
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
|
| 417 |
-
|
| 418 |
-
|
| 419 |
-
|
| 420 |
-
|
| 421 |
-
|
| 422 |
-
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
|
| 426 |
-
language=render_language,
|
| 427 |
-
)
|
| 428 |
-
if rotation_max_angle != 0:
|
| 429 |
-
font_image = font_image.rotate(
|
| 430 |
-
render_angle, expand=True, fillcolor=(0, 0, 0, 0)
|
| 431 |
-
)
|
| 432 |
-
|
| 433 |
-
im.paste(font_image, (render_x, render_y), font_image)
|
| 434 |
-
return im, FontLabel(
|
| 435 |
-
clip_width,
|
| 436 |
-
clip_height,
|
| 437 |
-
text,
|
| 438 |
-
font,
|
| 439 |
-
text_color,
|
| 440 |
-
text_size,
|
| 441 |
-
text_direction,
|
| 442 |
-
stroke_width,
|
| 443 |
-
stroke_color,
|
| 444 |
-
line_spacing,
|
| 445 |
-
render_language,
|
| 446 |
-
(render_x, render_y, render_width, render_height),
|
| 447 |
-
render_angle,
|
| 448 |
-
)
|
| 449 |
-
except Exception as e:
|
| 450 |
-
print(e)
|
|
|
|
| 1 |
+
__all__ = ["generate_font_image"]
|
| 2 |
|
| 3 |
|
| 4 |
epislon = 1e-6
|
|
|
|
| 47 |
assert gray_ratio + color_ratio - 1 < epislon
|
| 48 |
|
| 49 |
# clip size ratio
|
| 50 |
+
clip_width_max_ratio = 0.8
|
| 51 |
+
clip_width_min_ratio = 0.3
|
| 52 |
clip_width_height_min_ratio = 0.75
|
| 53 |
clip_width_height_max_ratio = 1.25
|
| 54 |
|
|
|
|
| 69 |
# in degree
|
| 70 |
rotation_max_angle = 30
|
| 71 |
|
| 72 |
+
text_size_min = 15
|
|
|
|
| 73 |
|
| 74 |
+
# ratio of dataset size for cjk
|
| 75 |
cjk_distribution = {
|
| 76 |
"ja": 0.3,
|
| 77 |
"ko": 0.2,
|
|
|
|
| 83 |
|
| 84 |
assert sum(cjk_distribution.values()) - 1 < epislon
|
| 85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
|
| 87 |
import math
|
| 88 |
import random
|
| 89 |
+
import traceback
|
| 90 |
from PIL import Image, ImageDraw, ImageFont
|
| 91 |
from .fontlabel import FontLabel
|
| 92 |
from .font import DSFont
|
|
|
|
| 235 |
return color + (255,)
|
| 236 |
|
| 237 |
|
| 238 |
+
def generate_font_image(
|
| 239 |
img_path: str, font: DSFont, corpus_manager: CorpusGeneratorManager
|
| 240 |
) -> tuple[Image.Image, FontLabel]:
|
| 241 |
+
im = Image.open(img_path)
|
| 242 |
+
# crop image
|
| 243 |
+
width, height = im.size
|
| 244 |
+
clip_width = random.randint(
|
| 245 |
+
int(width * clip_width_min_ratio), int(width * clip_width_max_ratio)
|
| 246 |
+
)
|
| 247 |
+
clip_height = random.randint(
|
| 248 |
+
int(clip_width * clip_width_height_min_ratio),
|
| 249 |
+
int(clip_width * clip_width_height_max_ratio),
|
| 250 |
+
)
|
| 251 |
+
if clip_height > height:
|
| 252 |
+
clip_height = height
|
| 253 |
+
clip_x = random.randint(0, width - clip_width)
|
| 254 |
+
clip_y = random.randint(0, height - clip_height)
|
| 255 |
+
im = im.crop((clip_x, clip_y, clip_x + clip_width, clip_y + clip_height))
|
| 256 |
+
|
| 257 |
+
# language
|
| 258 |
+
render_language = font.language
|
| 259 |
+
if render_language == "CJK":
|
| 260 |
+
render_language = random.choices(
|
| 261 |
+
list(cjk_distribution.keys()), list(cjk_distribution.values())
|
| 262 |
+
)[0]
|
| 263 |
+
elif render_language == "zh":
|
| 264 |
+
render_language = random.choice(["zh-Hans", "zh-Hant"])
|
| 265 |
+
|
| 266 |
+
# text direction
|
| 267 |
+
if random.random() < ltr_ratio:
|
| 268 |
+
text_direction = "ltr"
|
| 269 |
+
else:
|
| 270 |
+
text_direction = "ttb"
|
| 271 |
|
| 272 |
+
# text length
|
| 273 |
+
if random.random() < short_ratio:
|
| 274 |
+
text = corpus_manager.generate(short_condition, font, render_language)
|
| 275 |
+
elif random.random() < median_ratio:
|
| 276 |
+
text = corpus_manager.generate(median_condition, font, render_language)
|
| 277 |
+
else:
|
| 278 |
+
text = corpus_manager.generate(long_condition, font, render_language)
|
| 279 |
+
|
| 280 |
+
# text color & stroke
|
| 281 |
+
if random.random() < gray_ratio:
|
| 282 |
+
text_color = random.randint(0, 255)
|
| 283 |
+
text_color = (text_color, text_color, text_color)
|
| 284 |
+
# no stroke in gray
|
| 285 |
+
stroke_ratio = 0
|
| 286 |
+
stroke_color = None
|
| 287 |
+
im = im.convert("L")
|
| 288 |
+
else:
|
| 289 |
+
text_color = random_color()
|
| 290 |
+
# whether use stroke
|
| 291 |
+
if random.random() < pure_color_ratio:
|
| 292 |
+
stroke_ratio = 0
|
| 293 |
+
stroke_color = None
|
| 294 |
+
else:
|
| 295 |
+
stroke_ratio = random.random() * stroke_width_max_ratio
|
| 296 |
+
stroke_color = random_color()
|
| 297 |
+
|
| 298 |
+
# line spacing
|
| 299 |
+
line_spacing_ratio = (
|
| 300 |
+
random.random() * (line_spacing_max_ratio - line_spacing_min_ratio)
|
| 301 |
+
+ line_spacing_min_ratio
|
| 302 |
+
)
|
| 303 |
+
|
| 304 |
+
# calculate render ratio
|
| 305 |
+
render_calculation_stroke_width = int(stroke_ratio * render_calculation_size)
|
| 306 |
+
render_calculation_line_spacing = int(line_spacing_ratio * render_calculation_size)
|
| 307 |
+
|
| 308 |
+
pil_font = ImageFont.truetype(font.path, size=render_calculation_size)
|
| 309 |
+
text_bbox = render_bbox(
|
| 310 |
+
ImageDraw.Draw(im),
|
| 311 |
+
(0, 0),
|
| 312 |
+
text,
|
| 313 |
+
font=pil_font,
|
| 314 |
+
direction=text_direction,
|
| 315 |
+
spacing=render_calculation_line_spacing,
|
| 316 |
+
stroke_width=render_calculation_stroke_width,
|
| 317 |
+
language=render_language,
|
| 318 |
+
)
|
| 319 |
+
(
|
| 320 |
+
render_calculation_width_no_rotation,
|
| 321 |
+
render_calculation_height_no_rotation,
|
| 322 |
+
) = (text_bbox[2] - text_bbox[0], text_bbox[3] - text_bbox[1])
|
| 323 |
+
render_calculation_font_x_no_rotation = text_bbox[0]
|
| 324 |
+
render_calculation_font_y_no_rotation = text_bbox[1]
|
| 325 |
+
|
| 326 |
+
if random.random() < no_rotation_ratio:
|
| 327 |
+
render_angle = 0
|
| 328 |
+
|
| 329 |
+
render_calculation_width = render_calculation_width_no_rotation
|
| 330 |
+
render_calculation_height = render_calculation_height_no_rotation
|
| 331 |
+
else:
|
| 332 |
+
render_angle = random.randint(-rotation_max_angle, rotation_max_angle)
|
| 333 |
|
| 334 |
+
render_calculation_width = int(
|
| 335 |
+
render_calculation_width_no_rotation
|
| 336 |
+
* math.cos(math.radians(abs(render_angle)))
|
| 337 |
+
+ render_calculation_height_no_rotation
|
| 338 |
+
* math.sin(math.radians(abs(render_angle)))
|
| 339 |
+
)
|
| 340 |
+
render_calculation_height = int(
|
| 341 |
+
render_calculation_width_no_rotation
|
| 342 |
+
* math.sin(math.radians(abs(render_angle)))
|
| 343 |
+
+ render_calculation_height_no_rotation
|
| 344 |
+
* math.cos(math.radians(abs(render_angle)))
|
| 345 |
+
)
|
| 346 |
|
| 347 |
+
# calculate render size
|
| 348 |
+
render_ratio = (
|
| 349 |
+
random.random() * (text_longer_max_ratio - text_longer_min_ratio)
|
| 350 |
+
+ text_longer_min_ratio
|
| 351 |
+
)
|
| 352 |
+
if render_calculation_width / render_calculation_height < clip_width / clip_height:
|
| 353 |
+
# height is the limit
|
| 354 |
+
render_height = int(clip_height * render_ratio)
|
| 355 |
+
render_width = int(
|
| 356 |
+
render_calculation_width / render_calculation_height * render_height
|
| 357 |
+
)
|
| 358 |
+
else:
|
| 359 |
+
# width is the limit
|
| 360 |
+
render_width = int(clip_width * render_ratio)
|
| 361 |
+
render_height = int(
|
| 362 |
+
render_calculation_height / render_calculation_width * render_width
|
| 363 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 364 |
|
| 365 |
+
# calculate text size
|
| 366 |
+
text_size = int(render_calculation_size * render_height / render_calculation_height)
|
| 367 |
+
|
| 368 |
+
if text_size < text_size_min:
|
| 369 |
+
raise ValueError("text size is too small")
|
| 370 |
+
|
| 371 |
+
render_width_no_rotation = int(
|
| 372 |
+
render_calculation_width_no_rotation / render_calculation_height * render_height
|
| 373 |
+
)
|
| 374 |
+
render_height_no_rotation = int(
|
| 375 |
+
render_calculation_height_no_rotation
|
| 376 |
+
/ render_calculation_height
|
| 377 |
+
* render_height
|
| 378 |
+
)
|
| 379 |
+
render_font_x_no_rotation = int(
|
| 380 |
+
render_calculation_font_x_no_rotation
|
| 381 |
+
/ render_calculation_height
|
| 382 |
+
* render_height
|
| 383 |
+
)
|
| 384 |
+
render_font_y_no_rotation = int(
|
| 385 |
+
render_calculation_font_y_no_rotation
|
| 386 |
+
/ render_calculation_height
|
| 387 |
+
* render_height
|
| 388 |
+
)
|
| 389 |
+
stroke_width = int(text_size * stroke_ratio)
|
| 390 |
+
line_spacing = int(text_size * line_spacing_ratio)
|
| 391 |
+
|
| 392 |
+
# calculate render position
|
| 393 |
+
render_x = random.randint(0, clip_width - render_width)
|
| 394 |
+
render_y = random.randint(0, clip_height - render_height)
|
| 395 |
+
|
| 396 |
+
font_image = Image.new(
|
| 397 |
+
"RGBA",
|
| 398 |
+
(render_width_no_rotation, render_height_no_rotation),
|
| 399 |
+
(0, 0, 0, 0),
|
| 400 |
+
)
|
| 401 |
+
pil_font = ImageFont.truetype(font.path, size=text_size)
|
| 402 |
+
render_text(
|
| 403 |
+
ImageDraw.Draw(font_image),
|
| 404 |
+
(-render_font_x_no_rotation, -render_font_y_no_rotation),
|
| 405 |
+
text,
|
| 406 |
+
font=pil_font,
|
| 407 |
+
fill=RGB2RGBA(text_color),
|
| 408 |
+
direction=text_direction,
|
| 409 |
+
spacing=line_spacing,
|
| 410 |
+
stroke_width=stroke_width,
|
| 411 |
+
stroke_fill=RGB2RGBA(stroke_color),
|
| 412 |
+
language=render_language,
|
| 413 |
+
)
|
| 414 |
+
if rotation_max_angle != 0:
|
| 415 |
+
font_image = font_image.rotate(
|
| 416 |
+
render_angle, expand=True, fillcolor=(0, 0, 0, 0)
|
| 417 |
+
)
|
| 418 |
|
| 419 |
+
im.paste(font_image, (render_x, render_y), font_image)
|
| 420 |
+
return im, FontLabel(
|
| 421 |
+
clip_width,
|
| 422 |
+
clip_height,
|
| 423 |
+
text,
|
| 424 |
+
font,
|
| 425 |
+
text_color,
|
| 426 |
+
text_size,
|
| 427 |
+
text_direction,
|
| 428 |
+
stroke_width,
|
| 429 |
+
stroke_color,
|
| 430 |
+
line_spacing,
|
| 431 |
+
render_language,
|
| 432 |
+
(render_x, render_y, render_width, render_height),
|
| 433 |
+
render_angle,
|
| 434 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|