File size: 3,681 Bytes
7d394f6
 
 
346b427
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c8cc838
7d394f6
 
 
 
 
04c0f3a
 
d79b562
 
04c0f3a
 
 
7d394f6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
346b427
7d394f6
04c0f3a
7d394f6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
346b427
 
 
 
 
 
 
 
 
 
7d394f6
 
 
c8cc838
04c0f3a
7d394f6
 
346b427
 
7d394f6
346b427
e025409
346b427
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import os
os.system("pip install git+https://github.com/ai-forever/ScrabbleGAN")

import numpy as np
import cv2

import gradio as gr
from huggingface_hub import hf_hub_download

from scgan.config import Config
from scgan.generate_images import ImgGenerator


def download_weights(repo_id):
    char_map_path = hf_hub_download(repo_id, "char_map.pkl")
    weights_path = hf_hub_download(repo_id, "model_checkpoint_epoch_200.pth.tar")
    return char_map_path, weights_path


def get_text_from_image(img):
    COLOR_MIN = np.array([0, 0, 0],np.uint8)
    COLOR_MAX = np.array([250,250,160],np.uint8)

    img = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
    text_mask = cv2.inRange(img, COLOR_MIN, COLOR_MAX).astype(bool)
    img = cv2.cvtColor(img, cv2.COLOR_HSV2RGB)

    bg = np.ones(img.shape, dtype=np.uint8) * 255
    bg[text_mask] = img[text_mask]
    return bg


def split_text_to_rows(text, n):
    # https://stackoverflow.com/a/6187258
    l = text.split()
    return [' '.join(l[x:x+n]) for x in range(0, len(l), n)]


def split_text_to_rows_by_chars(text, n):
    list_of_rows = []
    for i in range(0, len(text), n):
        list_of_rows.append(text[i:n+i].strip())
    return list_of_rows  


def remove_right_padding(img, len_text, char_w=32):
    # char_w for a standard ScrabbleGAN char width
    return img[:, :len_text*char_w]


def split_list2batches(lst, batch_size):
    """Split list of images to list of bacthes."""
    return [lst[i:i+batch_size] for i in range(0, len(lst), batch_size)]


def get_canvas_size(images, row_width, left_pad):
    canvas_width = 0
    canvas_height = 0
    for image in images:
        h, w = image.shape[:2]
        canvas_height += h*row_width
        if w > canvas_width:
            canvas_width = w
    canvas_width += left_pad
    # expand canvas to the height of the last image
    # (to correct the effect of rows shrinking)
    h = images[-1].shape[0]
    canvas_height += h - h*row_width
    return int(canvas_height), canvas_width


def predict(text):
    if text.find(NEW_LINE_SYMB) == -1:
        texts = split_text_to_rows_by_chars(text, CHARS_IN_ROW)
    else:
        texts = [row.strip() for row in text.split(NEW_LINE_SYMB)]

    texts_batches = split_list2batches(texts, BATCH_SIZE)

    images_on_white = []
    for texts_batch in texts_batches:
        imgs, texts_on_image = GENERATOR.generate(word_list=texts_batch)
        for img, text_on_image in zip(imgs, texts_on_image):
            cropped_image = remove_right_padding(
                img, len(text_on_image))
            images_on_white.append(
                get_text_from_image(cropped_image))

    canvas_height, canvas_width = get_canvas_size(
        images_on_white, ROW_WIDTH, LEFT_PAD)
    canvas = np.zeros((canvas_height, canvas_width, 3), dtype=np.uint8)
    canvas.fill(255)
    
    start_draw = 0
    for image_on_white in images_on_white:
        h, w = image_on_white.shape[:2]
        canvas[start_draw:start_draw+h, LEFT_PAD:LEFT_PAD+w] = image_on_white
        start_draw += int(h * ROW_WIDTH)
    return canvas


CHAR_MAP_PATH, WEIGHTS_PATH = download_weights("sberbank-ai/scrabblegan-peter")

GENERATOR = ImgGenerator(
    checkpt_path=WEIGHTS_PATH,
    config=Config,
    char_map_path=CHAR_MAP_PATH
)

BATCH_SIZE = 3
ROW_WIDTH = 0.7
LEFT_PAD = 10
WORDS_IN_ROW = 4
CHARS_IN_ROW = 40
NEW_LINE_SYMB = '{n}'

gr.Interface(
    predict,
    inputs=gr.Textbox(label=f"Type your text (RU) to generate it on an image. The text will be automatically splitted on lines, or you can use a new line symbol {NEW_LINE_SYMB}"),
    outputs=gr.Image(label="Generated image"),
    title="Peter the Great handwritten image generation",
).launch()