Spaces:

OmidSakaki
/

DocQA_Agent

Sleeping

File size: 2,558 Bytes

9453eac
5c3f634
4abc449
c5a772e
9453eac
c5a772e
 
 
54a29b3
c5a772e
 
6ecc4f4
 
c5a772e
 
768d260
c5a772e
 
 
6ecc4f4
 
 
c5a772e
6ecc4f4
c5a772e
 
 
 
 
 
6ecc4f4
c5a772e
 
 
 
 
54a29b3
 
c5a772e
 
 
54a29b3
c5a772e
54a29b3
 
c5a772e
9453eac
 
c5a772e
 
279ab91
6ecc4f4
c5a772e
54a29b3
 
a3b6ec8
 
c5a772e
6ecc4f4
 
9453eac
 
4abc449

import gradio as gr
import easyocr
import numpy as np
from typing import Tuple

class OCRProcessor:
    def __init__(self):
        self.reader = easyocr.Reader(['fa'])

    def extract_text(self, image: np.ndarray) -> str:
        try:
            results = self.reader.readtext(image, detail=0, paragraph=True)
            return "\n".join(results) if results else ""
        except Exception as e:
            raise RuntimeError(f"خطا در پردازش OCR: {str(e)}")

class TextPostProcessor:
    def __init__(self):
        self.replacements = {
            'ي': 'ی', 'ك': 'ک',
            '۰': '0', '۱': '1', '۲': '2', '۳': '3', '۴': '4',
            '۵': '5', '۶': '6', '۷': '7', '۸': '8', '۹': '9'
        }

    def preprocess(self, text: str) -> str:
        if not text:
            return ""
        for old, new in self.replacements.items():
            text = text.replace(old, new)
        return " ".join(text.split())

def full_processing(image: np.ndarray) -> Tuple[str, str]:
    try:
        ocr_text = OCRProcessor().extract_text(image)
        post_processor = TextPostProcessor()
        cleaned_text = post_processor.preprocess(ocr_text)
        # بازنویسی واقعی انجام نمی‌شود، فقط متن تمیز می‌شود
        return cleaned_text, cleaned_text
    except Exception as e:
        return f"خطا: {str(e)}", ""

with gr.Blocks(title="پایپلاین OCR و تصحیح متن فارسی") as app:
    gr.Markdown("""
    # استخراج و تصحیح متن فارسی از تصویر
    فقط متن را تصحیح (نرمال‌سازی) می‌کند و بازنویسی واقعی انجام نمی‌دهد.
    """)
    with gr.Row():
        with gr.Column():
            img_input = gr.Image(label="تصویر ورودی", type="numpy")
            process_btn = gr.Button("پردازش تصویر", variant="primary")
        with gr.Column():
            with gr.Tab("متن استخراج شده"):
                raw_output = gr.Textbox(label="متن استخراج شده")
            with gr.Tab("متن نهایی (تصحیح شده)"):
                enhanced_output = gr.Textbox(label="متن نهایی")
            with gr.Tab("پیش‌نمایش تصویر"):
                img_preview = gr.Image(label="", interactive=False)

    img_input.change(fn=lambda x: x, inputs=img_input, outputs=img_preview)
    process_btn.click(fn=full_processing, inputs=img_input, outputs=[raw_output, enhanced_output])

if __name__ == "__main__":
    app.launch()