Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import time | |
| from PIL import Image | |
| from paddleocr import PaddleOCR | |
| from transformers import TrOCRProcessor, VisionEncoderDecoderModel | |
| import pytesseract | |
| import numpy as np | |
| # Initialize models | |
| paddle_ocr = PaddleOCR(lang='fa', use_textline_orientation=True) | |
| trocr_processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed") | |
| trocr_model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-printed") | |
| def run_paddleocr(image): | |
| """Run PaddleOCR on image""" | |
| image_path = "temp.jpg" | |
| image.save(image_path) | |
| result = paddle_ocr.ocr(image_path, cls=True) | |
| text = ' '.join([line[1][0] for line in result[0]]) if result else '' | |
| return text | |
| def run_trocr(image): | |
| """Run TrOCR on image""" | |
| pixel_values = trocr_processor(image, return_tensors="pt").pixel_values | |
| generated_ids = trocr_model.generate(pixel_values) | |
| return trocr_processor.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
| def run_tesseract(image): | |
| """Run Tesseract OCR on image""" | |
| return pytesseract.image_to_string(image, lang='fas') | |
| def compare_models(image): | |
| """Compare all three OCR models""" | |
| # Convert to RGB if needed | |
| if isinstance(image, np.ndarray): | |
| image = Image.fromarray(image) | |
| image = image.convert("RGB") | |
| results = {} | |
| # Run PaddleOCR | |
| start = time.time() | |
| results['PaddleOCR'] = run_paddleocr(image) | |
| paddle_time = time.time() - start | |
| # Run TrOCR | |
| start = time.time() | |
| results['TrOCR'] = run_trocr(image) | |
| trocr_time = time.time() - start | |
| # Run Tesseract | |
| start = time.time() | |
| results['Tesseract'] = run_tesseract(image) | |
| tesseract_time = time.time() - start | |
| # Create comparison table | |
| comparison = f""" | |
| <table> | |
| <tr> | |
| <th>مدل</th> | |
| <th>متن استخراج شده</th> | |
| <th>زمان پردازش (ثانیه)</th> | |
| </tr> | |
| <tr> | |
| <td>PaddleOCR</td> | |
| <td>{results['PaddleOCR']}</td> | |
| <td>{paddle_time:.2f}</td> | |
| </tr> | |
| <tr> | |
| <td>TrOCR</td> | |
| <td>{results['TrOCR']}</td> | |
| <td>{trocr_time:.2f}</td> | |
| </tr> | |
| <tr> | |
| <td>Tesseract</td> | |
| <td>{results['Tesseract']}</td> | |
| <td>{tesseract_time:.2f}</td> | |
| </tr> | |
| </table> | |
| """ | |
| return comparison, results['PaddleOCR'], results['TrOCR'], results['Tesseract'] | |
| # Create Gradio interface | |
| with gr.Blocks(title="مقایسه مدلهای OCR فارسی") as demo: | |
| gr.Markdown(""" | |
| ## مقایسه عملکرد مدلهای OCR برای زبان فارسی | |
| این برنامه سه مدل مختلف OCR را روی تصاویر فارسی مقایسه میکند: | |
| 1. PaddleOCR | |
| 2. TrOCR (مایکروسافت) | |
| 3. Tesseract OCR | |
| """) | |
| with gr.Row(): | |
| with gr.Column(): | |
| image_input = gr.Image(label="تصویر ورودی", type="pil") | |
| submit_btn = gr.Button("مقایسه مدلها") | |
| with gr.Column(): | |
| comparison_output = gr.HTML(label="نتایج مقایسه") | |
| paddle_output = gr.Textbox(label="PaddleOCR") | |
| trocr_output = gr.Textbox(label="TrOCR") | |
| tesseract_output = gr.Textbox(label="Tesseract") | |
| submit_btn.click( | |
| fn=compare_models, | |
| inputs=image_input, | |
| outputs=[comparison_output, paddle_output, trocr_output, tesseract_output] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |