Spaces:

OmidSakaki
/

DocQA_Agent

Sleeping

File size: 2,718 Bytes

9453eac
5c3f634
4abc449
3209503
9453eac
c5a772e
 
 
54a29b3
c5a772e
 
6ecc4f4
 
c5a772e
12a2f23
768d260
3209503
c5a772e
3209503
 
 
 
 
6ecc4f4
12a2f23
 
 
57fa964
adbe621
 
12a2f23
 
 
57fa964
12a2f23
57fa964
12a2f23
3209503
c5a772e
adbe621
12a2f23
 
 
 
 
c5a772e
12a2f23
 
 
 
c5a772e
9453eac
 
c5a772e
12a2f23
 
279ab91
12a2f23
 
28a9f71
57fa964
adbe621
12a2f23
 
57fa964
9453eac
 
4abc449

import gradio as gr
import easyocr
import numpy as np
from transformers import pipeline

class OCRProcessor:
    def __init__(self):
        self.reader = easyocr.Reader(['fa'])

    def extract_text(self, image: np.ndarray) -> str:
        try:
            results = self.reader.readtext(image, detail=0, paragraph=True)
            return "\n".join(results) if results else ""
        except Exception as e:
            return f"خطا در پردازش OCR: {str(e)}"

class MultilingualQAModel:
    def __init__(self):
        self.qa_pipeline = pipeline(
            "question-answering",
            model="deepset/roberta-base-squad2",
            tokenizer="deepset/roberta-base-squad2"
        )

    def answer_question(self, context: str, question: str) -> str:
        if not context.strip() or not question.strip():
            return "متن یا سوال وارد نشده است."
        try:
            result = self.qa_pipeline({"context": context, "question": question})
            answer = result.get('answer', '').strip()
            if not answer or answer in ['[CLS]', '[SEP]', '[PAD]']:
                return "جوابی یافت نشد."
            return answer
        except Exception as e:
            return f"خطا در مدل پرسش و پاسخ: {str(e)}"

ocr_processor = OCRProcessor()
qa_model = MultilingualQAModel()

def pipeline_fn(image, question):
    context = ocr_processor.extract_text(image)
    answer = qa_model.answer_question(context, question)
    return context, answer

with gr.Blocks(title="استخراج متن و پاسخ به سوال از تصویر فارسی") as app:
    gr.Markdown("""
    # سیستم هوشمند پرسش و پاسخ از روی تصویر فارسی
    1. تصویر را بارگذاری کنید تا متن استخراج شود.
    2. سوال خود را به فارسی تایپ کنید.
    3. دکمه «پاسخ» را بزنید.
    """)
    with gr.Row():
        with gr.Column():
            img_input = gr.Image(label="تصویر ورودی", type="numpy")
            question_input = gr.Textbox(label="سوال شما به فارسی", placeholder="مثلاً: نویسنده این متن کیست؟", lines=1)
            process_btn = gr.Button("پاسخ")
        with gr.Column():
            context_output = gr.Textbox(label="متن استخراج شده", lines=10, max_lines=None, interactive=False)
            answer_output = gr.Textbox(label="پاسخ مدل", lines=3, max_lines=None, interactive=False)

    process_btn.click(
        fn=pipeline_fn,
        inputs=[img_input, question_input],
        outputs=[context_output, answer_output]
    )

if __name__ == "__main__":
    app.launch()