Spaces:

OmidSakaki
/

DocQA_Agent

Sleeping

File size: 2,931 Bytes

9453eac
5c3f634
4abc449
c5a772e
f774dbf
9453eac
c5a772e
 
 
54a29b3
c5a772e
 
6ecc4f4
 
c5a772e
 
768d260
57fa964
c5a772e
f774dbf
 
 
 
 
 
6ecc4f4
57fa964
 
 
f774dbf
57fa964
f774dbf
 
 
 
 
 
 
 
 
57fa964
 
 
 
f774dbf
 
57fa964
 
 
 
c5a772e
 
57fa964
c5a772e
57fa964
 
 
 
 
c5a772e
57fa964
 
c5a772e
57fa964
c5a772e
f774dbf
c5a772e
28a9f71
9453eac
 
c5a772e
 
279ab91
77d9d02
 
28a9f71
57fa964
 
 
 
 
9453eac
 
4abc449

import gradio as gr
import easyocr
import numpy as np
from typing import Tuple
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

class OCRProcessor:
    def __init__(self):
        self.reader = easyocr.Reader(['fa'])

    def extract_text(self, image: np.ndarray) -> str:
        try:
            results = self.reader.readtext(image, detail=0, paragraph=True)
            return "\n".join(results) if results else ""
        except Exception as e:
            raise RuntimeError(f"خطا در پردازش OCR: {str(e)}")

class TextCorrector:
    def __init__(self):
        model_name = "persiannlp/mt5-small-parsinlu-arc-comqa-question"
        try:
            self.tokenizer = AutoTokenizer.from_pretrained(model_name)
            self.model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
        except Exception as e:
            raise RuntimeError(f"خطا در بارگذاری مدل زبانی: {str(e)}")

    def correct(self, text: str) -> str:
        if not text.strip():
            return text
            
        try:
            inputs = self.tokenizer(
                "اصلاح متن: " + text,
                return_tensors="pt",
                max_length=512,
                truncation=True
            )
            
            outputs = self.model.generate(
                **inputs,
                max_length=512,
                num_beams=5,
                early_stopping=True
            )
            
            return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        except Exception as e:
            print(f"خطا در تصحیح متن: {e}")
            return text

def full_processing(image: np.ndarray) -> Tuple[str, str]:
    try:
        # استخراج متن از تصویر
        ocr_text = OCRProcessor().extract_text(image)
        
        # تصحیح متن با مدل زبانی
        corrected_text = TextCorrector().correct(ocr_text)
        
        return ocr_text, corrected_text
    except Exception as e:
        error_msg = f"خطا: {str(e)}"
        return error_msg, error_msg

with gr.Blocks(title="پایپلاین OCR + تصحیح خودکار متن فارسی") as app:
    gr.Markdown("""
    # سیستم استخراج و تصحیح هوشمند متن فارسی
    """)

    with gr.Row():
        with gr.Column():
            img_input = gr.Image(label="تصویر ورودی", type="numpy")
            process_btn = gr.Button("پردازش تصویر", variant="primary")
        with gr.Column():
            raw_output = gr.Textbox(label="متن استخراج شده (خام)", lines=8, max_lines=None)
            corrected_output = gr.Textbox(label="متن تصحیح شده (هوشمند)", lines=15, max_lines=None)

    process_btn.click(
        fn=full_processing,
        inputs=img_input,
        outputs=[raw_output, corrected_output]
    )

if __name__ == "__main__":
    app.launch()