Spaces:
Sleeping
Sleeping
File size: 3,751 Bytes
9453eac 2bf547d 24f0403 9453eac dd4c7df 2d23095 dd4c7df 2bf547d 9453eac dd4c7df 2bf547d 9453eac 2bf547d 24f0403 dd4c7df 2d23095 dd4c7df 9453eac 2bf547d 24f0403 dd4c7df 2bf547d 24f0403 2bf547d 24f0403 2bf547d 24f0403 2bf547d 24f0403 2bf547d 24f0403 2bf547d 24f0403 2bf547d 24f0403 2bf547d 24f0403 2bf547d 24f0403 9453eac 2bf547d 24f0403 2bf547d 9453eac 2bf547d 24f0403 279ab91 2bf547d 9453eac 24f0403 9453eac 2bf547d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
import gradio as gr
import time
import numpy as np
from PIL import Image
try:
from paddleocr import PaddleOCR
paddle_ocr = PaddleOCR(lang='fa') # حذف پارامتر cls
except ImportError:
raise ImportError("لطفا ابتدا paddlepaddle و paddleocr را نصب کنید: pip install paddlepaddle paddleocr")
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
# Initialize TrOCR
trocr_processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed")
trocr_model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-printed")
def run_paddleocr(image):
"""Run PaddleOCR on image"""
if isinstance(image, Image.Image):
image = np.array(image)
try:
result = paddle_ocr.ocr(image) # حذف پارامتر cls
return ' '.join([line[1][0] for line in result[0]]) if result else ''
except Exception as e:
return f"خطا در PaddleOCR: {str(e)}"
def run_trocr(image):
"""Run TrOCR on image"""
if isinstance(image, np.ndarray):
image = Image.fromarray(image)
try:
pixel_values = trocr_processor(image, return_tensors="pt").pixel_values
generated_ids = trocr_model.generate(pixel_values)
return trocr_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
except Exception as e:
return f"خطا در TrOCR: {str(e)}"
def compare_models(image):
"""Compare PaddleOCR and TrOCR models"""
if isinstance(image, np.ndarray):
image = Image.fromarray(image)
image = image.convert("RGB")
results = {}
times = {}
# Run PaddleOCR
start = time.time()
results['PaddleOCR'] = run_paddleocr(image)
times['PaddleOCR'] = time.time() - start
# Run TrOCR
start = time.time()
results['TrOCR'] = run_trocr(image)
times['TrOCR'] = time.time() - start
comparison = f"""
<table style="width:100%">
<tr>
<th style="text-align:center">مدل</th>
<th style="text-align:center">متن استخراج شده</th>
<th style="text-align:center">زمان پردازش (ثانیه)</th>
</tr>
<tr>
<td style="text-align:center">PaddleOCR</td>
<td style="text-align:right; direction:rtl">{results['PaddleOCR']}</td>
<td style="text-align:center">{times['PaddleOCR']:.3f}</td>
</tr>
<tr>
<td style="text-align:center">TrOCR</td>
<td style="text-align:right; direction:rtl">{results['TrOCR']}</td>
<td style="text-align:center">{times['TrOCR']:.3f}</td>
</tr>
</table>
"""
return comparison, results['PaddleOCR'], results['TrOCR']
# Create Gradio interface
with gr.Blocks(title="مقایسه مدلهای OCR فارسی") as demo:
gr.Markdown("""
## مقایسه عملکرد مدلهای OCR برای زبان فارسی
این برنامه دو مدل مختلف OCR را روی تصاویر فارسی مقایسه میکند:
1. PaddleOCR
2. TrOCR (مایکروسافت)
""")
with gr.Row():
with gr.Column():
image_input = gr.Image(label="تصویر ورودی", type="pil")
submit_btn = gr.Button("مقایسه مدلها", variant="primary")
with gr.Column():
comparison_output = gr.HTML(label="نتایج مقایسه")
paddle_output = gr.Textbox(label="PaddleOCR")
trocr_output = gr.Textbox(label="TrOCR")
submit_btn.click(
fn=compare_models,
inputs=image_input,
outputs=[comparison_output, paddle_output, trocr_output]
)
if __name__ == "__main__":
demo.launch() |