feat: add exclusion rules
Browse files- configs/font.yml +3 -0
- font_dataset/font.py +12 -3
- font_ds_generate_script.py +10 -3
- font_ds_stat.py +5 -1
configs/font.yml
CHANGED
|
@@ -11,6 +11,9 @@
|
|
| 11 |
# - Others
|
| 12 |
dataset:
|
| 13 |
path: ./dataset/fonts
|
|
|
|
|
|
|
|
|
|
| 14 |
specs:
|
| 15 |
- path:
|
| 16 |
- ./Adobe/CJK
|
|
|
|
| 11 |
# - Others
|
| 12 |
dataset:
|
| 13 |
path: ./dataset/fonts
|
| 14 |
+
exclusion:
|
| 15 |
+
- ./Founder Type(方正)/韩文/方正朝文中圆.TTF
|
| 16 |
+
- ./Founder Type(方正)/简繁/ttf/方正宋体S-超大字符集(SIP).TTF
|
| 17 |
specs:
|
| 18 |
- path:
|
| 19 |
- ./Adobe/CJK
|
font_dataset/font.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
| 1 |
import yaml
|
| 2 |
import os
|
| 3 |
-
from typing import List
|
| 4 |
|
| 5 |
|
| 6 |
from .utils import get_files
|
|
@@ -15,7 +14,7 @@ class DSFont:
|
|
| 15 |
self.language = language
|
| 16 |
|
| 17 |
|
| 18 |
-
def load_fonts(config_path="configs/font.yml")
|
| 19 |
with open(config_path, "r", encoding="utf-8") as f:
|
| 20 |
config = yaml.safe_load(f)
|
| 21 |
|
|
@@ -41,4 +40,14 @@ def load_fonts(config_path="configs/font.yml") -> List[DSFont]:
|
|
| 41 |
font_list.append(DSFont(file, spec["language"]))
|
| 42 |
|
| 43 |
font_list.sort(key=lambda x: x.path)
|
| 44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import yaml
|
| 2 |
import os
|
|
|
|
| 3 |
|
| 4 |
|
| 5 |
from .utils import get_files
|
|
|
|
| 14 |
self.language = language
|
| 15 |
|
| 16 |
|
| 17 |
+
def load_fonts(config_path="configs/font.yml"):
|
| 18 |
with open(config_path, "r", encoding="utf-8") as f:
|
| 19 |
config = yaml.safe_load(f)
|
| 20 |
|
|
|
|
| 40 |
font_list.append(DSFont(file, spec["language"]))
|
| 41 |
|
| 42 |
font_list.sort(key=lambda x: x.path)
|
| 43 |
+
|
| 44 |
+
exclusion_list = ds_config["exclusion"]
|
| 45 |
+
exclusion_list = [os.path.join(ds_path, path) for path in exclusion_list]
|
| 46 |
+
|
| 47 |
+
def exclusion_rule(font: DSFont):
|
| 48 |
+
for exclusion in exclusion_list:
|
| 49 |
+
if os.path.samefile(font.path, exclusion):
|
| 50 |
+
return True
|
| 51 |
+
return False
|
| 52 |
+
|
| 53 |
+
return font_list, exclusion_rule
|
font_ds_generate_script.py
CHANGED
|
@@ -31,7 +31,7 @@ dataset_path = "./dataset/font_img"
|
|
| 31 |
os.makedirs(dataset_path, exist_ok=True)
|
| 32 |
|
| 33 |
|
| 34 |
-
fonts = load_fonts()
|
| 35 |
corpus_manager = CorpusGeneratorManager()
|
| 36 |
images = background_image_generator()
|
| 37 |
|
|
@@ -41,10 +41,17 @@ def generate_dataset(dataset_type: str, cnt: int):
|
|
| 41 |
os.makedirs(dataset_bath_dir, exist_ok=True)
|
| 42 |
|
| 43 |
def _generate_single(args):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
while True:
|
| 45 |
try:
|
| 46 |
-
i, j, font = args
|
| 47 |
-
|
| 48 |
image_file_name = f"font_{i}_img_{j}.jpg"
|
| 49 |
label_file_name = f"font_{i}_img_{j}.bin"
|
| 50 |
|
|
|
|
| 31 |
os.makedirs(dataset_path, exist_ok=True)
|
| 32 |
|
| 33 |
|
| 34 |
+
fonts, exclusion_rule = load_fonts()
|
| 35 |
corpus_manager = CorpusGeneratorManager()
|
| 36 |
images = background_image_generator()
|
| 37 |
|
|
|
|
| 41 |
os.makedirs(dataset_bath_dir, exist_ok=True)
|
| 42 |
|
| 43 |
def _generate_single(args):
|
| 44 |
+
i, j, font = args
|
| 45 |
+
print(
|
| 46 |
+
f"Generating {dataset_type} font: {font.path} {i} / {len(fonts)}, image {j}"
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
if exclusion_rule(font):
|
| 50 |
+
print(f"Excluded font: {font.path}")
|
| 51 |
+
return
|
| 52 |
+
|
| 53 |
while True:
|
| 54 |
try:
|
|
|
|
|
|
|
| 55 |
image_file_name = f"font_{i}_img_{j}.jpg"
|
| 56 |
label_file_name = f"font_{i}_img_{j}.bin"
|
| 57 |
|
font_ds_stat.py
CHANGED
|
@@ -23,12 +23,16 @@ test_cnt_cjk = int(test_cnt * cjk_ratio)
|
|
| 23 |
dataset_path = "./dataset/font_img"
|
| 24 |
os.makedirs(dataset_path, exist_ok=True)
|
| 25 |
|
| 26 |
-
fonts = load_fonts()
|
| 27 |
|
| 28 |
|
| 29 |
cnt = 0
|
| 30 |
|
| 31 |
for font in fonts:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
if font.language == "CJK":
|
| 33 |
cnt += cjk_ratio
|
| 34 |
else:
|
|
|
|
| 23 |
dataset_path = "./dataset/font_img"
|
| 24 |
os.makedirs(dataset_path, exist_ok=True)
|
| 25 |
|
| 26 |
+
fonts, exclusion_rule = load_fonts()
|
| 27 |
|
| 28 |
|
| 29 |
cnt = 0
|
| 30 |
|
| 31 |
for font in fonts:
|
| 32 |
+
if exclusion_rule(font):
|
| 33 |
+
print(f"Excluded font: {font.path}")
|
| 34 |
+
continue
|
| 35 |
+
|
| 36 |
if font.language == "CJK":
|
| 37 |
cnt += cjk_ratio
|
| 38 |
else:
|