DocQA_Agent / app.py
OmidSakaki's picture
Update app.py
24f0403 verified
raw
history blame
3.54 kB
import gradio as gr
import time
import numpy as np
from PIL import Image
from paddleocr import PaddleOCR
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
# Initialize models
paddle_ocr = PaddleOCR(lang='fa', use_textline_orientation=True)
trocr_processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed")
trocr_model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-printed")
def run_paddleocr(image):
"""Run PaddleOCR on image"""
# Convert to numpy array if needed
if isinstance(image, Image.Image):
image = np.array(image)
result = paddle_ocr.ocr(image, cls=True)
text = ' '.join([line[1][0] for line in result[0]]) if result else ''
return text
def run_trocr(image):
"""Run TrOCR on image"""
# Convert to PIL Image if needed
if isinstance(image, np.ndarray):
image = Image.fromarray(image)
pixel_values = trocr_processor(image, return_tensors="pt").pixel_values
generated_ids = trocr_model.generate(pixel_values)
return trocr_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
def compare_models(image):
"""Compare PaddleOCR and TrOCR models"""
# Convert to RGB if needed
if isinstance(image, np.ndarray):
image = Image.fromarray(image)
image = image.convert("RGB")
results = {}
times = {}
# Run PaddleOCR
start = time.time()
results['PaddleOCR'] = run_paddleocr(image)
times['PaddleOCR'] = time.time() - start
# Run TrOCR
start = time.time()
results['TrOCR'] = run_trocr(image)
times['TrOCR'] = time.time() - start
# Create comparison table
comparison = f"""
<table style="width:100%">
<tr>
<th style="text-align:center">مدل</th>
<th style="text-align:center">متن استخراج شده</th>
<th style="text-align:center">زمان پردازش (ثانیه)</th>
</tr>
<tr>
<td style="text-align:center">PaddleOCR</td>
<td style="text-align:right; direction:rtl">{results['PaddleOCR']}</td>
<td style="text-align:center">{times['PaddleOCR']:.3f}</td>
</tr>
<tr>
<td style="text-align:center">TrOCR</td>
<td style="text-align:right; direction:rtl">{results['TrOCR']}</td>
<td style="text-align:center">{times['TrOCR']:.3f}</td>
</tr>
</table>
"""
return comparison, results['PaddleOCR'], results['TrOCR']
# Create Gradio interface
with gr.Blocks(title="مقایسه مدل‌های OCR فارسی") as demo:
gr.Markdown("""
## مقایسه عملکرد مدل‌های OCR برای زبان فارسی
این برنامه دو مدل مختلف OCR را روی تصاویر فارسی مقایسه می‌کند:
1. PaddleOCR
2. TrOCR (مایکروسافت)
""")
with gr.Row():
with gr.Column():
image_input = gr.Image(label="تصویر ورودی", type="pil")
submit_btn = gr.Button("مقایسه مدل‌ها", variant="primary")
with gr.Column():
comparison_output = gr.HTML(label="نتایج مقایسه")
paddle_output = gr.Textbox(label="PaddleOCR")
trocr_output = gr.Textbox(label="TrOCR")
submit_btn.click(
fn=compare_models,
inputs=image_input,
outputs=[comparison_output, paddle_output, trocr_output]
)
if __name__ == "__main__":
demo.launch()