File size: 4,668 Bytes
b9e018b
e651999
 
 
68e1313
 
 
 
b9e018b
e651999
 
 
 
03b6d75
 
 
 
 
 
 
68e1313
 
 
 
 
 
 
 
 
03b6d75
68e1313
 
e651999
 
03b6d75
 
 
 
 
 
 
68e1313
 
 
 
 
 
 
 
 
e651999
68e1313
 
b9e018b
 
 
 
e651999
44eb855
e651999
 
 
 
 
 
 
68e1313
03b6d75
68e1313
 
 
03b6d75
68e1313
 
 
 
 
 
 
 
 
 
 
 
 
 
e651999
2d2df69
68e1313
 
 
 
 
2d2df69
68e1313
 
 
 
 
e651999
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import os
import gradio as gr
from Plan.AiLLM import llm_recognition
from Plan.pytesseractOCR import ocr_recognition
from Preprocess.preprocessImg import (
    preprocess_image001, preprocess_image002, preprocess_image003,
    preprocess_image004, preprocess_image005
)

# 取得所有語言清單
languages = os.popen('tesseract --list-langs').read().split('\n')[1:-1]


def preprocess_and_ocr(image, valid_type, language):
    # 方案一
    pre_img_001 = preprocess_image001(image)
    ocr_result_001 = ocr_recognition(pre_img_001, valid_type, language)
    # 方案二
    pre_img_002 = preprocess_image002(image)
    ocr_result_002 = ocr_recognition(pre_img_002, valid_type, language)
    # 方案三
    pre_img_003 = preprocess_image003(image)
    ocr_result_003 = ocr_recognition(pre_img_003, valid_type, language)
    # 方案四
    pre_img_004 = preprocess_image004(image)
    ocr_result_004 = ocr_recognition(pre_img_004, valid_type, language)
    # 方案五
    pre_img_005 = preprocess_image005(image)
    ocr_result_005 = ocr_recognition(pre_img_005, valid_type, language)

    return (pre_img_001, pre_img_002, pre_img_003, pre_img_004, pre_img_005,
            ocr_result_001, ocr_result_002, ocr_result_003, ocr_result_004, ocr_result_005)


def preprocess_and_llm(image, valid_type, language):
    # 方案一
    pre_img_001 = preprocess_image001(image)
    llm_result_001 = llm_recognition(pre_img_001, valid_type, language)
    # 方案二
    pre_img_002 = preprocess_image002(image)
    llm_result_002 = llm_recognition(pre_img_002, valid_type, language)
    # 方案三
    pre_img_003 = preprocess_image003(image)
    llm_result_003 = llm_recognition(pre_img_003, valid_type, language)
    # 方案四
    pre_img_004 = preprocess_image004(image)
    llm_result_004 = llm_recognition(pre_img_004, valid_type, language)
    # 方案五
    pre_img_005 = preprocess_image005(image)
    llm_result_005 = llm_recognition(pre_img_005, valid_type, language)

    return (pre_img_001, pre_img_002, pre_img_003, pre_img_004, pre_img_005,
            llm_result_001, llm_result_002, llm_result_003, llm_result_004, llm_result_005)


with gr.Blocks() as demo:
    with gr.Row():
        image_input = gr.Image(type="pil", label="上傳圖片")
        validation_type = gr.Dropdown(choices=["純文字", "身分證正面", "身分證反面"], label="驗證類別")
        language_dropdown = gr.Dropdown(choices=languages, value="chi_tra", label="語言")

    with gr.Row():
        ocr_button = gr.Button("使用 OCR")
        llm_button = gr.Button("使用 AI LLM")

    with gr.Row():
        preprocess_output_001 = gr.Image(type="pil", label="預處理後的圖片-方案一")
        ocr_output_001 = gr.JSON(label="OCR-001-解析結果")
        llm_output_001 = gr.JSON(label="AiLLM-001-解析結果")
    with gr.Row():
        preprocess_output_002 = gr.Image(type="pil", label="預處理後的圖片-方案二")
        ocr_output_002 = gr.JSON(label="OCR-002-解析結果")
        llm_output_002 = gr.JSON(label="AiLLM-002-解析結果")

    with gr.Row():
        preprocess_output_003 = gr.Image(type="pil", label="預處理後的圖片-方案三")
        ocr_output_003 = gr.JSON(label="OCR-003-解析結果")
        llm_output_003 = gr.JSON(label="AiLLM-003-解析結果")
    with gr.Row():
        preprocess_output_004 = gr.Image(type="pil", label="預處理後的圖片-方案四")
        ocr_output_004 = gr.JSON(label="OCR-004-解析結果")
        llm_output_004 = gr.JSON(label="AiLLM-004-解析結果")
    with gr.Row():
        preprocess_output_005 = gr.Image(type="pil", label="預處理後的圖片-方案五")
        ocr_output_005 = gr.JSON(label="OCR-005-解析結果")
        llm_output_005 = gr.JSON(label="AiLLM-005-解析結果")

    ocr_button.click(preprocess_and_ocr, inputs=[image_input, validation_type, language_dropdown],
                     outputs=[
                         preprocess_output_001, preprocess_output_002, preprocess_output_003, preprocess_output_004,
                         preprocess_output_005,
                         ocr_output_001, ocr_output_002, ocr_output_003, ocr_output_004, ocr_output_005
                     ])
    llm_button.click(preprocess_and_llm, inputs=[image_input, validation_type, language_dropdown],
                     outputs=[
                         preprocess_output_001, preprocess_output_002, preprocess_output_003, preprocess_output_004,
                         preprocess_output_005,
                         llm_output_001, llm_output_002, llm_output_003, llm_output_004, llm_output_005
                     ])

demo.launch(share=False)