feat: more robust generation script
Browse files- font_dataset/font.py +18 -1
- font_dataset/layout.py +7 -2
- font_ds_generate_script.py +31 -11
font_dataset/font.py
CHANGED
|
@@ -1,5 +1,7 @@
|
|
| 1 |
import yaml
|
| 2 |
import os
|
|
|
|
|
|
|
| 3 |
|
| 4 |
|
| 5 |
from .utils import get_files
|
|
@@ -37,7 +39,7 @@ def load_fonts(config_path="configs/font.yml"):
|
|
| 37 |
if rule is not None and not rule(file):
|
| 38 |
print("skip: " + file)
|
| 39 |
continue
|
| 40 |
-
font_list.append(DSFont(file, spec["language"]))
|
| 41 |
|
| 42 |
font_list.sort(key=lambda x: x.path)
|
| 43 |
|
|
@@ -51,3 +53,18 @@ def load_fonts(config_path="configs/font.yml"):
|
|
| 51 |
return False
|
| 52 |
|
| 53 |
return font_list, exclusion_rule
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import yaml
|
| 2 |
import os
|
| 3 |
+
from typing import Dict
|
| 4 |
+
import pickle
|
| 5 |
|
| 6 |
|
| 7 |
from .utils import get_files
|
|
|
|
| 39 |
if rule is not None and not rule(file):
|
| 40 |
print("skip: " + file)
|
| 41 |
continue
|
| 42 |
+
font_list.append(DSFont(str(file).replace("\\", "/"), spec["language"]))
|
| 43 |
|
| 44 |
font_list.sort(key=lambda x: x.path)
|
| 45 |
|
|
|
|
| 53 |
return False
|
| 54 |
|
| 55 |
return font_list, exclusion_rule
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def load_font_with_exclusion(
|
| 59 |
+
config_path="configs/font.yml", cache_path="font_list_cache.bin"
|
| 60 |
+
) -> Dict:
|
| 61 |
+
if os.path.exists(cache_path):
|
| 62 |
+
return pickle.load(open(cache_path, "rb"))
|
| 63 |
+
font_list, exclusion_rule = load_fonts(config_path)
|
| 64 |
+
font_list = list(filter(lambda x: not exclusion_rule(x), font_list))
|
| 65 |
+
font_list.sort(key=lambda x: x.path)
|
| 66 |
+
print("font count: " + str(len(font_list)))
|
| 67 |
+
ret = {font_list[i].path: i for i in range(len(font_list))}
|
| 68 |
+
with open("font_list_cache.bin", "wb") as f:
|
| 69 |
+
pickle.dump(ret, f)
|
| 70 |
+
return ret
|
font_dataset/layout.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
from typing import Tuple
|
| 2 |
|
| 3 |
-
__all__ = ["generate_font_image"]
|
| 4 |
|
| 5 |
|
| 6 |
epislon = 1e-6
|
|
@@ -237,6 +237,11 @@ def RGB2RGBA(color):
|
|
| 237 |
return color + (255,)
|
| 238 |
|
| 239 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 240 |
def generate_font_image(
|
| 241 |
img_path: str, font: DSFont, corpus_manager: CorpusGeneratorManager
|
| 242 |
) -> Tuple[Image.Image, FontLabel]:
|
|
@@ -368,7 +373,7 @@ def generate_font_image(
|
|
| 368 |
text_size = int(render_calculation_size * render_height / render_calculation_height)
|
| 369 |
|
| 370 |
if text_size < text_size_min:
|
| 371 |
-
raise
|
| 372 |
|
| 373 |
render_width_no_rotation = int(
|
| 374 |
render_calculation_width_no_rotation / render_calculation_height * render_height
|
|
|
|
| 1 |
from typing import Tuple
|
| 2 |
|
| 3 |
+
__all__ = ["generate_font_image", "TextSizeTooSmallException"]
|
| 4 |
|
| 5 |
|
| 6 |
epislon = 1e-6
|
|
|
|
| 237 |
return color + (255,)
|
| 238 |
|
| 239 |
|
| 240 |
+
class TextSizeTooSmallException(Exception):
|
| 241 |
+
def __init__(self):
|
| 242 |
+
super().__init__(f"Text Size Too Small")
|
| 243 |
+
|
| 244 |
+
|
| 245 |
def generate_font_image(
|
| 246 |
img_path: str, font: DSFont, corpus_manager: CorpusGeneratorManager
|
| 247 |
) -> Tuple[Image.Image, FontLabel]:
|
|
|
|
| 373 |
text_size = int(render_calculation_size * render_height / render_calculation_height)
|
| 374 |
|
| 375 |
if text_size < text_size_min:
|
| 376 |
+
raise TextSizeTooSmallException()
|
| 377 |
|
| 378 |
render_width_no_rotation = int(
|
| 379 |
render_calculation_width_no_rotation / render_calculation_height * render_height
|
font_ds_generate_script.py
CHANGED
|
@@ -5,8 +5,8 @@ import os
|
|
| 5 |
import concurrent.futures
|
| 6 |
from tqdm import tqdm
|
| 7 |
import time
|
| 8 |
-
from font_dataset.font import load_fonts
|
| 9 |
-
from font_dataset.layout import generate_font_image
|
| 10 |
from font_dataset.text import CorpusGeneratorManager, UnqualifiedFontException
|
| 11 |
from font_dataset.background import background_image_generator
|
| 12 |
|
|
@@ -39,9 +39,27 @@ corpus_manager = CorpusGeneratorManager()
|
|
| 39 |
images = background_image_generator()
|
| 40 |
|
| 41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
def generate_dataset(dataset_type: str, cnt: int):
|
| 43 |
-
|
| 44 |
-
os.makedirs(
|
| 45 |
|
| 46 |
def _generate_single(args):
|
| 47 |
i, j, font = args
|
|
@@ -61,8 +79,8 @@ def generate_dataset(dataset_type: str, cnt: int):
|
|
| 61 |
image_file_name = f"font_{i}_img_{j}.jpg"
|
| 62 |
label_file_name = f"font_{i}_img_{j}.bin"
|
| 63 |
|
| 64 |
-
image_file_path = os.path.join(
|
| 65 |
-
label_file_path = os.path.join(
|
| 66 |
|
| 67 |
# detect cache
|
| 68 |
if os.path.exists(image_file_path) and os.path.exists(label_file_path):
|
|
@@ -79,14 +97,16 @@ def generate_dataset(dataset_type: str, cnt: int):
|
|
| 79 |
pickle.dump(label, open(label_file_path, "wb"))
|
| 80 |
return
|
| 81 |
except UnqualifiedFontException as e:
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
with open(unqualified_log_file_name, "a+") as f:
|
| 85 |
-
f.write(f"{e.font.path}\n")
|
| 86 |
return
|
| 87 |
-
except
|
| 88 |
traceback.print_exc()
|
| 89 |
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
|
| 91 |
work_list = []
|
| 92 |
|
|
|
|
| 5 |
import concurrent.futures
|
| 6 |
from tqdm import tqdm
|
| 7 |
import time
|
| 8 |
+
from font_dataset.font import load_fonts, DSFont
|
| 9 |
+
from font_dataset.layout import generate_font_image, TextSizeTooSmallException
|
| 10 |
from font_dataset.text import CorpusGeneratorManager, UnqualifiedFontException
|
| 11 |
from font_dataset.background import background_image_generator
|
| 12 |
|
|
|
|
| 39 |
images = background_image_generator()
|
| 40 |
|
| 41 |
|
| 42 |
+
def add_exclusion(font: DSFont, reason: str, dataset_base_dir: str, i: int, j: int):
|
| 43 |
+
print(f"Excluded font: {font.path}, reason: {reason}")
|
| 44 |
+
runtime_exclusion_list.append(font.path)
|
| 45 |
+
with open(unqualified_log_file_name, "a+") as f:
|
| 46 |
+
f.write(f"{font.path} # {reason}\n")
|
| 47 |
+
for i in range(j + 1):
|
| 48 |
+
image_file_name = f"font_{i}_img_{j}.jpg"
|
| 49 |
+
label_file_name = f"font_{i}_img_{j}.bin"
|
| 50 |
+
|
| 51 |
+
image_file_path = os.path.join(dataset_base_dir, image_file_name)
|
| 52 |
+
label_file_path = os.path.join(dataset_base_dir, label_file_name)
|
| 53 |
+
|
| 54 |
+
if os.path.exists(image_file_path):
|
| 55 |
+
os.remove(image_file_path)
|
| 56 |
+
if os.path.exists(label_file_path):
|
| 57 |
+
os.remove(label_file_path)
|
| 58 |
+
|
| 59 |
+
|
| 60 |
def generate_dataset(dataset_type: str, cnt: int):
|
| 61 |
+
dataset_base_dir = os.path.join(dataset_path, dataset_type)
|
| 62 |
+
os.makedirs(dataset_base_dir, exist_ok=True)
|
| 63 |
|
| 64 |
def _generate_single(args):
|
| 65 |
i, j, font = args
|
|
|
|
| 79 |
image_file_name = f"font_{i}_img_{j}.jpg"
|
| 80 |
label_file_name = f"font_{i}_img_{j}.bin"
|
| 81 |
|
| 82 |
+
image_file_path = os.path.join(dataset_base_dir, image_file_name)
|
| 83 |
+
label_file_path = os.path.join(dataset_base_dir, label_file_name)
|
| 84 |
|
| 85 |
# detect cache
|
| 86 |
if os.path.exists(image_file_path) and os.path.exists(label_file_path):
|
|
|
|
| 97 |
pickle.dump(label, open(label_file_path, "wb"))
|
| 98 |
return
|
| 99 |
except UnqualifiedFontException as e:
|
| 100 |
+
traceback.print_exc()
|
| 101 |
+
add_exclusion(font, "unqualified font", dataset_base_dir, i, j)
|
|
|
|
|
|
|
| 102 |
return
|
| 103 |
+
except TextSizeTooSmallException as e:
|
| 104 |
traceback.print_exc()
|
| 105 |
continue
|
| 106 |
+
except Exception as e:
|
| 107 |
+
traceback.print_exc()
|
| 108 |
+
add_exclusion(font, f"other: {repr(e)}", dataset_base_dir, i, j)
|
| 109 |
+
return
|
| 110 |
|
| 111 |
work_list = []
|
| 112 |
|