Spaces:

OmidSakaki
/

DocQA_Agent

Sleeping

App Files Files Community

DocQA_Agent / app.py

OmidSakaki

Update app.py

7ea3e69 verified about 2 months ago

raw

history blame

2.93 kB

	import gradio as gr
	import easyocr
	import numpy as np
	from transformers import AutoTokenizer, AutoModelForQuestionAnswering, pipeline

	class OCRProcessor:
	def __init__(self):
	self.reader = easyocr.Reader(['fa'])

	def extract_text(self, image: np.ndarray) -> str:
	try:
	results = self.reader.readtext(image, detail=0, paragraph=True)
	return "\n".join(results) if results else ""
	except Exception as e:
	return f"خطا در پردازش OCR: {str(e)}"

	class PersianQAModel:
	def __init__(self):
	model_name = "HooshvareLab/bert-fa-qa"
	try:
	self.qa_pipeline = pipeline(
	"question-answering",
	model=model_name,
	tokenizer=model_name
	)
	except Exception as e:
	raise RuntimeError(f"خطا در بارگذاری مدل پرسش و پاسخ: {str(e)}")

	def answer_question(self, context: str, question: str) -> str:
	if not context.strip() or not question.strip():
	return "متن یا سوال وارد نشده است."
	try:
	result = self.qa_pipeline({"context": context, "question": question})
	answer = result.get('answer', '').strip()
	if not answer or answer in ['[CLS]', '[SEP]', '[PAD]']:
	return "جوابی یافت نشد."
	return answer
	except Exception as e:
	return f"خطا در مدل پرسش و پاسخ: {str(e)}"

	ocr_processor = OCRProcessor()
	qa_model = PersianQAModel()

	def pipeline_fn(image, question):
	context = ocr_processor.extract_text(image)
	answer = qa_model.answer_question(context, question)
	return context, answer

	with gr.Blocks(title="استخراج متن و پاسخ به سوال از تصویر فارسی") as app:
	gr.Markdown("""
	# سیستم هوشمند پرسش و پاسخ از روی تصویر فارسی
	1. تصویر را بارگذاری کنید تا متن استخراج شود.
	2. سوال خود را به فارسی تایپ کنید.
	3. دکمه «پاسخ» را بزنید.
	""")
	with gr.Row():
	with gr.Column():
	img_input = gr.Image(label="تصویر ورودی", type="numpy")
	question_input = gr.Textbox(label="سوال شما به فارسی", placeholder="مثلاً: نویسنده این متن کیست؟", lines=1)
	process_btn = gr.Button("پاسخ")
	with gr.Column():
	context_output = gr.Textbox(label="متن استخراج شده", lines=10, max_lines=None, interactive=False)
	answer_output = gr.Textbox(label="پاسخ مدل", lines=3, max_lines=None, interactive=False)

	process_btn.click(
	fn=pipeline_fn,
	inputs=[img_input, question_input],
	outputs=[context_output, answer_output]
	)

	if __name__ == "__main__":
	app.launch()