DocQA_Agent / app.py
OmidSakaki's picture
Update app.py
2d23095 verified
raw
history blame
3.75 kB
import gradio as gr
import time
import numpy as np
from PIL import Image
try:
from paddleocr import PaddleOCR
paddle_ocr = PaddleOCR(lang='fa') # حذف پارامتر cls
except ImportError:
raise ImportError("لطفا ابتدا paddlepaddle و paddleocr را نصب کنید: pip install paddlepaddle paddleocr")
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
# Initialize TrOCR
trocr_processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed")
trocr_model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-printed")
def run_paddleocr(image):
"""Run PaddleOCR on image"""
if isinstance(image, Image.Image):
image = np.array(image)
try:
result = paddle_ocr.ocr(image) # حذف پارامتر cls
return ' '.join([line[1][0] for line in result[0]]) if result else ''
except Exception as e:
return f"خطا در PaddleOCR: {str(e)}"
def run_trocr(image):
"""Run TrOCR on image"""
if isinstance(image, np.ndarray):
image = Image.fromarray(image)
try:
pixel_values = trocr_processor(image, return_tensors="pt").pixel_values
generated_ids = trocr_model.generate(pixel_values)
return trocr_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
except Exception as e:
return f"خطا در TrOCR: {str(e)}"
def compare_models(image):
"""Compare PaddleOCR and TrOCR models"""
if isinstance(image, np.ndarray):
image = Image.fromarray(image)
image = image.convert("RGB")
results = {}
times = {}
# Run PaddleOCR
start = time.time()
results['PaddleOCR'] = run_paddleocr(image)
times['PaddleOCR'] = time.time() - start
# Run TrOCR
start = time.time()
results['TrOCR'] = run_trocr(image)
times['TrOCR'] = time.time() - start
comparison = f"""
<table style="width:100%">
<tr>
<th style="text-align:center">مدل</th>
<th style="text-align:center">متن استخراج شده</th>
<th style="text-align:center">زمان پردازش (ثانیه)</th>
</tr>
<tr>
<td style="text-align:center">PaddleOCR</td>
<td style="text-align:right; direction:rtl">{results['PaddleOCR']}</td>
<td style="text-align:center">{times['PaddleOCR']:.3f}</td>
</tr>
<tr>
<td style="text-align:center">TrOCR</td>
<td style="text-align:right; direction:rtl">{results['TrOCR']}</td>
<td style="text-align:center">{times['TrOCR']:.3f}</td>
</tr>
</table>
"""
return comparison, results['PaddleOCR'], results['TrOCR']
# Create Gradio interface
with gr.Blocks(title="مقایسه مدل‌های OCR فارسی") as demo:
gr.Markdown("""
## مقایسه عملکرد مدل‌های OCR برای زبان فارسی
این برنامه دو مدل مختلف OCR را روی تصاویر فارسی مقایسه می‌کند:
1. PaddleOCR
2. TrOCR (مایکروسافت)
""")
with gr.Row():
with gr.Column():
image_input = gr.Image(label="تصویر ورودی", type="pil")
submit_btn = gr.Button("مقایسه مدل‌ها", variant="primary")
with gr.Column():
comparison_output = gr.HTML(label="نتایج مقایسه")
paddle_output = gr.Textbox(label="PaddleOCR")
trocr_output = gr.Textbox(label="TrOCR")
submit_btn.click(
fn=compare_models,
inputs=image_input,
outputs=[comparison_output, paddle_output, trocr_output]
)
if __name__ == "__main__":
demo.launch()