Spaces:

OmidSakaki
/

DocQA_Agent

Sleeping

App Files Files Community

DocQA_Agent / app.py

OmidSakaki

Update app.py

d3485e2 verified about 2 months ago

raw

history blame

2.19 kB

	import gradio as gr
	from paddleocr import PaddleOCR
	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
	from PIL import Image
	import os

	# --- مدل‌ها ---
	ocr_model = PaddleOCR(lang='fa', use_textline_orientation=True)
	tokenizer = AutoTokenizer.from_pretrained("persiannlp/mt5-small-parsinlu-grammar-correction")
	nlp_model = AutoModelForSeq2SeqLM.from_pretrained("persiannlp/mt5-small-parsinlu-grammar-correction")

	# --- توابع پردازش ---
	def run_ocr(image):
	image_path = image.name # مسیر فایل موقت
	result = ocr_model.ocr(image_path, cls=True)
	texts = [line[1][0] for line in result[0]] if result else []
	os.remove(image_path) # حذف فایل موقت
	return " ".join(texts)

	def postprocess_text(text):
	inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True)
	outputs = nlp_model.generate(**inputs)
	return tokenizer.decode(outputs[0], skip_special_tokens=True)

	def process_image(image):
	raw_text = run_ocr(image)
	processed_text = postprocess_text(raw_text) if raw_text else "متنی یافت نشد!"
	return raw_text, processed_text

	# --- رابط Gradio ---
	with gr.Blocks(title="OCR فارسی با پردازش NLP") as app:
	gr.Markdown("## 🔠 OCR فارسی + پردازش متن با مدل زبانی")
	gr.Markdown("متن را از تصاویر استخراج کنید و با مدل زبانی اصلاح کنید!")

	with gr.Row():
	image_input = gr.Image(type="filepath", label="تصویر ورودی")
	with gr.Column():
	raw_text_output = gr.Textbox(label="متن خام (OCR)")
	processed_text_output = gr.Textbox(label="متن پردازش‌شده (NLP)")

	submit_btn = gr.Button("پردازش تصویر")
	submit_btn.click(
	fn=process_image,
	inputs=image_input,
	outputs=[raw_text_output, processed_text_output]
	)

	gr.Markdown("---")
	gr.Markdown("### راهنما:\n1. تصویری حاوی متن فارسی آپلود کنید.\n2. روی دکمه پردازش کلیک کنید.")

	# اجرای برنامه
	if __name__ == "__main__":
	app.launch()