Spaces:

OmidSakaki
/

DocQA_Agent

Sleeping

File size: 4,049 Bytes

9453eac
5c3f634
f5ea811
4abc449
c5a772e
9453eac
c5a772e
 
 
 
 
 
 
 
 
6ecc4f4
 
c5a772e
 
768d260
c5a772e
 
 
6ecc4f4
 
 
c5a772e
 
f5ea811
19169b4
f5ea811
 
 
19169b4
 
f5ea811
c5a772e
6ecc4f4
c5a772e
6ecc4f4
c5a772e
 
 
 
 
6ecc4f4
c5a772e
f5ea811
c5a772e
 
f5ea811
8f46e75
f5ea811
 
6ecc4f4
f5ea811
6ecc4f4
f5ea811
19169b4
f5ea811
c5a772e
2bf547d
6ecc4f4
c5a772e
 
 
 
 
 
 
 
 
 
 
6ecc4f4
c5a772e
6ecc4f4
 
c5a772e
9453eac
 
c5a772e
 
279ab91
6ecc4f4
c5a772e
6ecc4f4
 
a3b6ec8
 
c5a772e
6ecc4f4
 
9453eac
 
4abc449

import gradio as gr
import easyocr
from transformers import pipeline
import numpy as np
from typing import Tuple

## 1. تنظیمات اولیه و مدل‌ها
# ----------------------------------
class OCRProcessor:
    def __init__(self):
        self.reader = easyocr.Reader(['fa'])
    
    def extract_text(self, image: np.ndarray) -> str:
        """استخراج متن از تصویر با EasyOCR"""
        try:
            results = self.reader.readtext(image, detail=0, paragraph=True)
            return "\n".join(results) if results else ""
        except Exception as e:
            raise RuntimeError(f"خطا در پردازش OCR: {str(e)}")

class TextPostProcessor:
    def __init__(self):
        self.replacements = {
            'ي': 'ی', 'ك': 'ک',
            '۰': '0', '۱': '1', '۲': '2', '۳': '3', '۴': '4',
            '۵': '5', '۶': '6', '۷': '7', '۸': '8', '۹': '9'
        }
        try:
            # استفاده از مدل instruction-tuned مناسب بازنویسی
            self.llm = pipeline(
                "text2text-generation",
                model="ParsiAI/gpt2-medium-fa-instruction",
                tokenizer="ParsiAI/gpt2-medium-fa-instruction"
            )
        except Exception as e:
            print("خطا در بارگذاری مدل بازنویسی:", e)
            self.llm = None

    def preprocess(self, text: str) -> str:
        """نرمال‌سازی ساده متن"""
        if not text:
            return ""
        for old, new in self.replacements.items():
            text = text.replace(old, new)
        return " ".join(text.split())

    def enhance_with_llm(self, text: str) -> str:
        """بازنویسی متن با مدل instruction-tuned"""
        if not text or not self.llm:
            return text
        prompt = f"متن زیر را بازنویسی کن:\n{text}"
        try:
            result = self.llm(prompt, max_length=256, num_return_sequences=1)
            enhanced_text = result[0]['generated_text'].strip()
            # اگر بازنویسی مدل بی‌معنا یا کوتاه بود، همان متن را برگردان
            if len(enhanced_text) < 8:
                return text
            return enhanced_text
        except Exception as e:
            print("خطا در بازنویسی:", e)
            return text

## 2. پایپلاین اصلی
def full_processing(image: np.ndarray) -> Tuple[str, str]:
    try:
        ocr_text = OCRProcessor().extract_text(image)
        post_processor = TextPostProcessor()
        cleaned_text = post_processor.preprocess(ocr_text)
        enhanced_text = post_processor.enhance_with_llm(cleaned_text)
        return cleaned_text, enhanced_text
    except Exception as e:
        return f"خطا: {str(e)}", ""

## 3. رابط کاربری Gradio
with gr.Blocks(title="پایپلاین OCR و بازنویسی متن فارسی") as app:
    gr.Markdown("""
    # سیستم استخراج و بازنویسی متن فارسی از تصویر
    تصویر را بارگذاری کنید، متن استخراج و سپس با مدل زبانی بازنویسی می‌شود.
    """)
    with gr.Row():
        with gr.Column():
            img_input = gr.Image(label="تصویر ورودی", type="numpy")
            process_btn = gr.Button("پردازش تصویر", variant="primary")
        with gr.Column():
            with gr.Tab("متن استخراج شده"):
                raw_output = gr.Textbox(label="متن استخراج شده")
            with gr.Tab("متن بازنویسی شده"):
                enhanced_output = gr.Textbox(label="متن بازنویسی شده")
            with gr.Tab("پیش‌نمایش تصویر"):
                img_preview = gr.Image(label="", interactive=False)

    img_input.change(fn=lambda x: x, inputs=img_input, outputs=img_preview)
    process_btn.click(fn=full_processing, inputs=img_input, outputs=[raw_output, enhanced_output])

if __name__ == "__main__":
    app.launch()