Spaces:
Sleeping
Sleeping
import gradio as gr | |
import time | |
from PIL import Image | |
from paddleocr import PaddleOCR | |
from transformers import TrOCRProcessor, VisionEncoderDecoderModel | |
import pytesseract | |
import numpy as np | |
# Initialize models | |
paddle_ocr = PaddleOCR(lang='fa', use_textline_orientation=True) | |
trocr_processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed") | |
trocr_model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-printed") | |
def run_paddleocr(image): | |
"""Run PaddleOCR on image""" | |
image_path = "temp.jpg" | |
image.save(image_path) | |
result = paddle_ocr.ocr(image_path, cls=True) | |
text = ' '.join([line[1][0] for line in result[0]]) if result else '' | |
return text | |
def run_trocr(image): | |
"""Run TrOCR on image""" | |
pixel_values = trocr_processor(image, return_tensors="pt").pixel_values | |
generated_ids = trocr_model.generate(pixel_values) | |
return trocr_processor.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
def run_tesseract(image): | |
"""Run Tesseract OCR on image""" | |
return pytesseract.image_to_string(image, lang='fas') | |
def compare_models(image): | |
"""Compare all three OCR models""" | |
# Convert to RGB if needed | |
if isinstance(image, np.ndarray): | |
image = Image.fromarray(image) | |
image = image.convert("RGB") | |
results = {} | |
# Run PaddleOCR | |
start = time.time() | |
results['PaddleOCR'] = run_paddleocr(image) | |
paddle_time = time.time() - start | |
# Run TrOCR | |
start = time.time() | |
results['TrOCR'] = run_trocr(image) | |
trocr_time = time.time() - start | |
# Run Tesseract | |
start = time.time() | |
results['Tesseract'] = run_tesseract(image) | |
tesseract_time = time.time() - start | |
# Create comparison table | |
comparison = f""" | |
<table> | |
<tr> | |
<th>مدل</th> | |
<th>متن استخراج شده</th> | |
<th>زمان پردازش (ثانیه)</th> | |
</tr> | |
<tr> | |
<td>PaddleOCR</td> | |
<td>{results['PaddleOCR']}</td> | |
<td>{paddle_time:.2f}</td> | |
</tr> | |
<tr> | |
<td>TrOCR</td> | |
<td>{results['TrOCR']}</td> | |
<td>{trocr_time:.2f}</td> | |
</tr> | |
<tr> | |
<td>Tesseract</td> | |
<td>{results['Tesseract']}</td> | |
<td>{tesseract_time:.2f}</td> | |
</tr> | |
</table> | |
""" | |
return comparison, results['PaddleOCR'], results['TrOCR'], results['Tesseract'] | |
# Create Gradio interface | |
with gr.Blocks(title="مقایسه مدلهای OCR فارسی") as demo: | |
gr.Markdown(""" | |
## مقایسه عملکرد مدلهای OCR برای زبان فارسی | |
این برنامه سه مدل مختلف OCR را روی تصاویر فارسی مقایسه میکند: | |
1. PaddleOCR | |
2. TrOCR (مایکروسافت) | |
3. Tesseract OCR | |
""") | |
with gr.Row(): | |
with gr.Column(): | |
image_input = gr.Image(label="تصویر ورودی", type="pil") | |
submit_btn = gr.Button("مقایسه مدلها") | |
with gr.Column(): | |
comparison_output = gr.HTML(label="نتایج مقایسه") | |
paddle_output = gr.Textbox(label="PaddleOCR") | |
trocr_output = gr.Textbox(label="TrOCR") | |
tesseract_output = gr.Textbox(label="Tesseract") | |
submit_btn.click( | |
fn=compare_models, | |
inputs=image_input, | |
outputs=[comparison_output, paddle_output, trocr_output, tesseract_output] | |
) | |
if __name__ == "__main__": | |
demo.launch() |