DocQA_Agent / app.py
OmidSakaki's picture
Update app.py
999a6b4 verified
raw
history blame
1.97 kB
import gradio as gr
from paddleocr import PaddleOCR
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from PIL import Image
import os
# --- مدل‌ها ---
try:
model_name = "m3hrdadfi/mt5-small-finetuned-grammar-synthesis"
ocr_model = PaddleOCR(lang='fa', use_textline_orientation=True)
tokenizer = AutoTokenizer.from_pretrained(model_name)
nlp_model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
except Exception as e:
raise gr.Error(f"خطا در بارگذاری مدل‌ها: لطفاً این خطا را به توسعه دهنده گزارش دهید. خطا: {str(e)}")
# --- توابع پردازش ---
def run_ocr(image):
image_path = image.name
result = ocr_model.ocr(image_path, cls=True)
texts = [line[1][0] for line in result[0]] if result else []
os.remove(image_path)
return " ".join(texts)
def postprocess_text(text):
inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True)
outputs = nlp_model.generate(**inputs)
return tokenizer.decode(outputs[0], skip_special_tokens=True)
# --- رابط کاربری ---
with gr.Blocks() as app:
gr.Markdown("## سیستم OCR فارسی با پردازش پیشرفته متن")
with gr.Row():
with gr.Column():
image_input = gr.Image(type="filepath", label="تصویر ورودی")
process_btn = gr.Button("پردازش تصویر")
with gr.Column():
raw_output = gr.Textbox(label="متن استخراج شده")
processed_output = gr.Textbox(label="متن پردازش شده")
def process_image(img):
# Avoid running OCR twice
raw = run_ocr(img)
processed = postprocess_text(raw)
return raw, processed
process_btn.click(
fn=process_image,
inputs=image_input,
outputs=[raw_output, processed_output]
)
if __name__ == "__main__":
app.launch()