File size: 4,331 Bytes
405302e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
def ocr_demo(image, task, ocr_type, ocr_box, ocr_color):
    res, html_content = process_image(image, task, ocr_type, ocr_box, ocr_color)
    
    res = f"$$ {res} $$"
    # res = res.replace("$$ \\begin{tabular}", "\\begin{tabular}")
    # res = res.replace("\\end{tabular} $$", "\\end{tabular}")
    # res = res.replace("\\(", "")
    # res = res.replace("\\)", "")
    
    if html_content:
        html_string = f'<iframe srcdoc="{html_content}" width="100%" height="600px"></iframe>'
        return res, html_string
    return res, None

@spaces.GPU
def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None):
    demo_html = os.path.join(results_folder, "demo.html")
    html_file = os.path.join(results_folder, f"{task.replace(' ', '_').lower()}.html")
    tikz_file = os.path.join(results_folder, "tikz.html")
    
    unique_id = str(uuid.uuid4())
    
    with tempfile.NamedTemporaryFile(mode='w+', suffix='.html', delete=False, dir=results_folder) as temp_file:
        temp_html_path = temp_file.name
    
    if task == "Plain Text OCR":
        res = model.chat(tokenizer, image, ocr_type='ocr')
        return res, None, unique_id
    else:
        if task == "Format Text OCR":
            res = model.chat(tokenizer, image, ocr_type='format', render=True, save_render_file=temp_html_path)
        elif task == "Fine-grained OCR (Box)":
            res = model.chat(tokenizer, image, ocr_type=ocr_type, ocr_box=ocr_box, render=True, save_render_file=temp_html_path)
        elif task == "Fine-grained OCR (Color)":
            res = model.chat(tokenizer, image, ocr_type=ocr_type, ocr_color=ocr_color, render=True, save_render_file=temp_html_path)
        elif task == "Multi-crop OCR":
            res = model.chat_crop(tokenizer, image, ocr_type='format', render=True, save_render_file=temp_html_path)
        elif task == "Render Formatted OCR":
            res = model.chat(tokenizer, image, ocr_type='format', render=True, save_render_file=temp_html_path)
        
        # html_content = None
        if os.path.exists(temp_html_path):
            with open(temp_html_path, 'r') as f:
                html_content = f.read()
        if os.path.exists(demo_html):
            with open(demo_html, 'r') as f:
                html_content = f.read()
        elif os.path.exists(html_file):
            with open(html_file, 'r') as f:
                html_content = f.read()
        elif os.path.exists(tikz_file):
            with open(tikz_file, 'r') as f:
                html_content = f.read()
        else:
            html_content = None
                
        return res, html_content, unique_id

@spaces.GPU
def process_image(image, task, ocr_type=None, ocr_box=None, ocr_color=None):
    demo_html = os.path.join(results_folder, "demo.html")
    html_file = os.path.join(results_folder, f"{task.replace(' ', '_').lower()}.html")
    tikz_file = os.path.join(results_folder, "tikz.html")
    
    if task == "Plain Text OCR":
        res = model.chat(tokenizer, image, ocr_type='ocr')
        return res, None
    else:
        if task == "Format Text OCR":
            res = model.chat(tokenizer, image, ocr_type='format', render=True, save_render_file=demo_html)
        elif task == "Fine-grained OCR (Box)":
            res = model.chat(tokenizer, image, ocr_type=ocr_type, ocr_box=ocr_box, render=True, save_render_file=demo_html)
        elif task == "Fine-grained OCR (Color)":
            res = model.chat(tokenizer, image, ocr_type=ocr_type, ocr_color=ocr_color, render=True, save_render_file=demo_html)
        elif task == "Multi-crop OCR":
            res = model.chat_crop(tokenizer, image, ocr_type='format', render=True, save_render_file=demo_html)
        elif task == "Render Formatted OCR":
            res = model.chat(tokenizer, image, ocr_type='format', render=True, save_render_file=demo_html)
        
        if os.path.exists(demo_html):
            with open(demo_html, 'r') as f:
                html_content = f.read()
        elif os.path.exists(html_file):
            with open(html_file, 'r') as f:
                html_content = f.read()
        elif os.path.exists(tikz_file):
            with open(tikz_file, 'r') as f:
                html_content = f.read()
        else:
            html_content = None
        
        return res, html_content