Spaces:

OmidSakaki
/

DocQA_Agent

Sleeping

App Files Files Community

OmidSakaki commited on Jul 2

Commit

2711484

verified ·

1 Parent(s): 3209503

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -11

app.py CHANGED Viewed

@@ -2,7 +2,11 @@ import gradio as gr
 import easyocr
 import numpy as np
 from transformers import pipeline
 class OCRProcessor:
     def __init__(self):
         self.reader = easyocr.Reader(['fa'])
@@ -14,6 +18,38 @@ class OCRProcessor:
         except Exception as e:
             return f"خطا در پردازش OCR: {str(e)}"
 class MultilingualQAModel:
     def __init__(self):
         self.qa_pipeline = pipeline(
@@ -34,20 +70,37 @@ class MultilingualQAModel:
         except Exception as e:
             return f"خطا در مدل پرسش و پاسخ: {str(e)}"
 ocr_processor = OCRProcessor()
-qa_model = MultilingualQAModel()
-def pipeline_fn(image, question):
     context = ocr_processor.extract_text(image)
-    answer = qa_model.answer_question(context, question)
-    return context, answer
-with gr.Blocks(title="استخراج متن و پاسخ به سوال از تصویر فارسی") as app:
     gr.Markdown("""
-    # سیستم هوشمند پرسش و پاسخ از روی تصویر فارسی
-    1. تصویر را بارگذاری کنید تا متن استخراج شود.
-    2. سوال خود را به فارسی تایپ کنید.
-    3. دکمه «پاسخ» را بزنید.
     """)
     with gr.Row():
         with gr.Column():
@@ -56,10 +109,10 @@ with gr.Blocks(title="استخراج متن و پاسخ به سوال از تص
             process_btn = gr.Button("پاسخ")
         with gr.Column():
             context_output = gr.Textbox(label="متن استخراج شده", lines=10, max_lines=None, interactive=False)
-            answer_output = gr.Textbox(label="پاسخ مدل", lines=3, max_lines=None, interactive=False)
     process_btn.click(
-        fn=pipeline_fn,
         inputs=[img_input, question_input],
         outputs=[context_output, answer_output]
     )

 import easyocr
 import numpy as np
 from transformers import pipeline
+from sentence_transformers import SentenceTransformer
+import faiss
+import torch
+# 1. OCR Processor
 class OCRProcessor:
     def __init__(self):
         self.reader = easyocr.Reader(['fa'])
         except Exception as e:
             return f"خطا در پردازش OCR: {str(e)}"
+# 2. Text Chunker
+def text_chunker(text, chunk_size=250, overlap=50):
+    words = text.split()
+    chunks = []
+    i = 0
+    while i < len(words):
+        chunk = " ".join(words[i:i+chunk_size])
+        chunks.append(chunk)
+        i += chunk_size - overlap
+    return chunks
+# 3. Embedding Agent
+class EmbeddingAgent:
+    def __init__(self):
+        self.model = SentenceTransformer('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2')
+    def embed(self, texts):
+        return self.model.encode(texts)
+# 4. Retriever Agent (with FAISS)
+class RetrieverAgent:
+    def __init__(self, embeddings, texts):
+        self.texts = texts
+        d = embeddings.shape[1]
+        self.index = faiss.IndexFlatL2(d)
+        self.index.add(embeddings)
+    def retrieve(self, query_embedding, top_k=1):
+        D, I = self.index.search(query_embedding, top_k)
+        return [self.texts[idx] for idx in I[0]]
+# 5. QA Agent (using multilingual QA model)
 class MultilingualQAModel:
     def __init__(self):
         self.qa_pipeline = pipeline(
         except Exception as e:
             return f"خطا در مدل پرسش و پاسخ: {str(e)}"
+# Full DocQA Pipeline
 ocr_processor = OCRProcessor()
+embedder_agent = EmbeddingAgent()
+qa_agent = MultilingualQAModel()
+def docqa_pipeline(image, question):
+    # 1. OCR
     context = ocr_processor.extract_text(image)
+    if context.startswith("خطا"):
+        return context, "پاسخی وجود ندارد"
+    # 2. Chunking
+    chunks = text_chunker(context)
+    # 3. Embedding (chunks + question)
+    chunk_embeddings = embedder_agent.embed(chunks)
+    question_embedding = embedder_agent.embed([question])
+    # 4. Retriever: پیدا کردن مرتبط‌ترین بخش
+    retriever = RetrieverAgent(chunk_embeddings, chunks)
+    relevant_chunk = retriever.retrieve(question_embedding, top_k=1)[0]
+    # 5. QA: پاسخ به سوال بر اساس بخش بازیابی‌شده
+    answer = qa_agent.answer_question(relevant_chunk, question)
+    return context, f"متن مرتبط:\n{relevant_chunk}\n\nپاسخ مدل:\n{answer}"
+with gr.Blocks(title="DocQA Agent: پرسش و پاسخ هوشمند از سند فارسی استخراج‌شده از تصویر") as app:
     gr.Markdown("""
+    # DocQA Agent
+    <br>
+    یک سامانه چندعاملی برای پرسش و پاسخ از اسناد فارسی (OCR + جستجو + پاسخ هوشمند)
     """)
     with gr.Row():
         with gr.Column():
             process_btn = gr.Button("پاسخ")
         with gr.Column():
             context_output = gr.Textbox(label="متن استخراج شده", lines=10, max_lines=None, interactive=False)
+            answer_output = gr.Textbox(label="جواب مدل (بخش مرتبط و پاسخ)", lines=10, max_lines=None, interactive=False)
     process_btn.click(
+        fn=docqa_pipeline,
         inputs=[img_input, question_input],
         outputs=[context_output, answer_output]
     )