Spaces:
Sleeping
Sleeping
File size: 3,537 Bytes
9453eac 2bf547d 24f0403 9453eac 2bf547d 9453eac 2bf547d 9453eac 2bf547d 24f0403 2bf547d 9453eac 2bf547d 24f0403 2bf547d 24f0403 2bf547d 24f0403 2bf547d 24f0403 2bf547d 24f0403 2bf547d 24f0403 2bf547d 24f0403 2bf547d 24f0403 2bf547d 24f0403 2bf547d 24f0403 9453eac 2bf547d 24f0403 2bf547d 9453eac 2bf547d 24f0403 279ab91 2bf547d 9453eac 24f0403 9453eac 2bf547d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
import gradio as gr
import time
import numpy as np
from PIL import Image
from paddleocr import PaddleOCR
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
# Initialize models
paddle_ocr = PaddleOCR(lang='fa', use_textline_orientation=True)
trocr_processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed")
trocr_model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-printed")
def run_paddleocr(image):
"""Run PaddleOCR on image"""
# Convert to numpy array if needed
if isinstance(image, Image.Image):
image = np.array(image)
result = paddle_ocr.ocr(image, cls=True)
text = ' '.join([line[1][0] for line in result[0]]) if result else ''
return text
def run_trocr(image):
"""Run TrOCR on image"""
# Convert to PIL Image if needed
if isinstance(image, np.ndarray):
image = Image.fromarray(image)
pixel_values = trocr_processor(image, return_tensors="pt").pixel_values
generated_ids = trocr_model.generate(pixel_values)
return trocr_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
def compare_models(image):
"""Compare PaddleOCR and TrOCR models"""
# Convert to RGB if needed
if isinstance(image, np.ndarray):
image = Image.fromarray(image)
image = image.convert("RGB")
results = {}
times = {}
# Run PaddleOCR
start = time.time()
results['PaddleOCR'] = run_paddleocr(image)
times['PaddleOCR'] = time.time() - start
# Run TrOCR
start = time.time()
results['TrOCR'] = run_trocr(image)
times['TrOCR'] = time.time() - start
# Create comparison table
comparison = f"""
<table style="width:100%">
<tr>
<th style="text-align:center">مدل</th>
<th style="text-align:center">متن استخراج شده</th>
<th style="text-align:center">زمان پردازش (ثانیه)</th>
</tr>
<tr>
<td style="text-align:center">PaddleOCR</td>
<td style="text-align:right; direction:rtl">{results['PaddleOCR']}</td>
<td style="text-align:center">{times['PaddleOCR']:.3f}</td>
</tr>
<tr>
<td style="text-align:center">TrOCR</td>
<td style="text-align:right; direction:rtl">{results['TrOCR']}</td>
<td style="text-align:center">{times['TrOCR']:.3f}</td>
</tr>
</table>
"""
return comparison, results['PaddleOCR'], results['TrOCR']
# Create Gradio interface
with gr.Blocks(title="مقایسه مدلهای OCR فارسی") as demo:
gr.Markdown("""
## مقایسه عملکرد مدلهای OCR برای زبان فارسی
این برنامه دو مدل مختلف OCR را روی تصاویر فارسی مقایسه میکند:
1. PaddleOCR
2. TrOCR (مایکروسافت)
""")
with gr.Row():
with gr.Column():
image_input = gr.Image(label="تصویر ورودی", type="pil")
submit_btn = gr.Button("مقایسه مدلها", variant="primary")
with gr.Column():
comparison_output = gr.HTML(label="نتایج مقایسه")
paddle_output = gr.Textbox(label="PaddleOCR")
trocr_output = gr.Textbox(label="TrOCR")
submit_btn.click(
fn=compare_models,
inputs=image_input,
outputs=[comparison_output, paddle_output, trocr_output]
)
if __name__ == "__main__":
demo.launch() |