Spaces:

OmidSakaki
/

DocQA_Agent

Sleeping

App Files Files Community

DocQA_Agent / app.py

OmidSakaki

Update app.py

3b15416 verified about 2 months ago

raw

history blame

3.52 kB

	import gradio as gr
	import easyocr
	import numpy as np
	from transformers import AutoTokenizer, AutoModelForQuestionAnswering
	import torch

	class OCRProcessor:
	def __init__(self):
	self.reader = easyocr.Reader(['fa'])

	def extract_text(self, image: np.ndarray) -> str:
	try:
	results = self.reader.readtext(image, detail=0, paragraph=True)
	return "\n".join(results) if results else ""
	except Exception as e:
	return f"خطا در پردازش OCR: {str(e)}"

	class PersianQAModel:
	def __init__(self):
	model_name = "OmidSakaki/fa_qa_nlp_model"
	try:
	self.tokenizer = AutoTokenizer.from_pretrained(model_name)
	self.model = AutoModelForQuestionAnswering.from_pretrained(model_name)
	except Exception as e:
	raise RuntimeError(f"خطا در بارگذاری مدل پرسش و پاسخ: {str(e)}")

	def answer_question(self, context: str, question: str) -> str:
	if not context.strip() or not question.strip():
	return "متن یا سوال وارد نشده است."
	try:
	inputs = self.tokenizer.encode_plus(
	question, context, return_tensors='pt', truncation=True, max_length=512
	)
	input_ids = inputs["input_ids"].tolist()[0]
	outputs = self.model(**inputs)
	answer_start = torch.argmax(outputs.start_logits)
	answer_end = torch.argmax(outputs.end_logits) + 1
	answer = self.tokenizer.convert_tokens_to_string(
	self.tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end])
	)
	# حذف توکن‌های اضافی یا فاصله
	answer = answer.strip()
	if not answer or answer in ['[CLS]', '[SEP]', '[PAD]']:
	return "جوابی یافت نشد."
	return answer
	except Exception as e:
	return f"خطا در مدل پرسش و پاسخ: {str(e)}"

	ocr_processor = OCRProcessor()
	qa_model = PersianQAModel()

	def pipeline(image, question):
	# استخراج متن از تصویر
	context = ocr_processor.extract_text(image)
	# پاسخ به سوال بر اساس متن
	answer = qa_model.answer_question(context, question)
	return context, answer

	with gr.Blocks(title="استخراج متن و پاسخ به سوال از تصویر فارسی") as app:
	gr.Markdown("""
	# سیستم هوشمند پرسش و پاسخ از روی تصویر فارسی
	1. تصویر را بارگذاری کنید تا متن استخراج شود.
	2. سوال خود را به فارسی تایپ کنید.
	3. دکمه «پاسخ» را بزنید.
	""")
	with gr.Row():
	with gr.Column():
	img_input = gr.Image(label="تصویر ورودی", type="numpy")
	question_input = gr.Textbox(label="سوال شما به فارسی", placeholder="مثلاً: نویسنده این متن کیست؟", lines=1)
	process_btn = gr.Button("پاسخ")
	with gr.Column():
	context_output = gr.Textbox(label="متن استخراج شده", lines=10, max_lines=None, interactive=False)
	answer_output = gr.Textbox(label="پاسخ مدل", lines=3, max_lines=None, interactive=False)

	process_btn.click(
	fn=pipeline,
	inputs=[img_input, question_input],
	outputs=[context_output, answer_output]
	)

	if __name__ == "__main__":
	app.launch()