Spaces:

OmidSakaki
/

DocQA_Agent

Sleeping

File size: 1,906 Bytes

9453eac
 
 
 
 
 
 
7a79275
279ab91
 
fc0e7b8
7a79275
 
 
fc0e7b8
7a79275
279ab91
9453eac
 
 
279ab91
9453eac
 
279ab91
9453eac
 
 
 
 
 
 
279ab91
 
 
9453eac
 
 
279ab91
 
 
 
 
 
9453eac
279ab91
 
9453eac
279ab91
9453eac

import gradio as gr
from paddleocr import PaddleOCR
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from PIL import Image
import os

# --- مدل‌ها ---
try:
    # مدل جدید تست شده و کارآمد
    model_name = "m3hrdadfi/mt5-small-parsinlu-grammar-correction"
    
    ocr_model = PaddleOCR(lang='fa', use_textline_orientation=True)
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    nlp_model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
    
except Exception as e:
    raise gr.Error(f"خطا در بارگذاری مدل‌ها: لطفاً این خطا را به توسعه دهنده گزارش دهید. خطا: {str(e)}")

# --- توابع پردازش ---
def run_ocr(image):
    image_path = image.name
    result = ocr_model.ocr(image_path, cls=True)
    texts = [line[1][0] for line in result[0]] if result else []
    os.remove(image_path)
    return " ".join(texts)

def postprocess_text(text):
    inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True)
    outputs = nlp_model.generate(**inputs)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# --- رابط کاربری ---
with gr.Blocks() as app:
    gr.Markdown("## سیستم OCR فارسی با پردازش پیشرفته متن")
    
    with gr.Row():
        with gr.Column():
            image_input = gr.Image(type="filepath", label="تصویر ورودی")
            process_btn = gr.Button("پردازش تصویر")
        
        with gr.Column():
            raw_output = gr.Textbox(label="متن استخراج شده")
            processed_output = gr.Textbox(label="متن پردازش شده")
    
    process_btn.click(
        fn=lambda img: (run_ocr(img), postprocess_text(run_ocr(img))),
        inputs=image_input,
        outputs=[raw_output, processed_output]
    )

if __name__ == "__main__":
    app.launch()