Spaces:

OmidSakaki
/

DocQA_Agent

Sleeping

App Files Files Community

OmidSakaki commited on Jul 3

Commit

87a1c7f

verified ·

1 Parent(s): fd5f89e

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -72

app.py CHANGED Viewed

@@ -1,10 +1,15 @@
 import gradio as gr
 import easyocr
 import numpy as np
-from transformers import pipeline
-from sentence_transformers import SentenceTransformer
-import faiss
-import torch
 # 1. OCR Processor (English)
 class OCRProcessor:
@@ -18,89 +23,65 @@ class OCRProcessor:
         except Exception as e:
             return f"OCR error: {str(e)}"
-# 2. Text Chunker
-def text_chunker(text, chunk_size=250, overlap=50):
-    words = text.split()
-    chunks = []
-    i = 0
-    while i < len(words):
-        chunk = " ".join(words[i:i+chunk_size])
-        chunks.append(chunk)
-        i += chunk_size - overlap
-    return chunks
-# 3. Embedding Agent (English)
-class EmbeddingAgent:
-    def __init__(self):
-        self.model = SentenceTransformer('all-MiniLM-L6-v2')
-    def embed(self, texts):
-        return self.model.encode(texts)
-# 4. Retriever Agent (with FAISS)
-class RetrieverAgent:
-    def __init__(self, embeddings, texts):
-        self.texts = texts
-        d = embeddings.shape[1]
-        self.index = faiss.IndexFlatL2(d)
-        self.index.add(embeddings)
-    def retrieve(self, query_embedding, top_k=1):
-        D, I = self.index.search(query_embedding, top_k)
-        return [self.texts[idx] for idx in I[0]]
-# 5. QA Agent (English QA model)
-class EnglishQAModel:
     def __init__(self):
-        self.qa_pipeline = pipeline(
-            "question-answering",
-            model="deepset/roberta-base-squad2",
-            tokenizer="deepset/roberta-base-squad2"
         )
-    def answer_question(self, context: str, question: str) -> str:
-        if not context.strip() or not question.strip():
-            return "No text or question provided."
-        try:
-            result = self.qa_pipeline({"context": context, "question": question})
-            answer = result.get('answer', '').strip()
-            if not answer or answer in ['[CLS]', '[SEP]', '[PAD]']:
-                return "No answer found."
-            return answer
-        except Exception as e:
-            return f"QA error: {str(e)}"
-# Full DocQA Pipeline (English)
 ocr_processor = OCRProcessor()
-embedder_agent = EmbeddingAgent()
-qa_agent = EnglishQAModel()
 def docqa_pipeline(image, question):
     # 1. OCR
     context = ocr_processor.extract_text(image)
     if context.startswith("OCR error"):
         return context, "No answer."
-    # 2. Chunking
-    chunks = text_chunker(context)
-    # 3. Embedding
-    chunk_embeddings = embedder_agent.embed(chunks)
-    question_embedding = embedder_agent.embed([question])
-    # 4. Retrieval
-    retriever = RetrieverAgent(chunk_embeddings, chunks)
-    relevant_chunk = retriever.retrieve(question_embedding, top_k=1)[0]
-    # 5. QA
-    answer = qa_agent.answer_question(relevant_chunk, question)
     return context, f"Relevant chunk:\n{relevant_chunk}\n\nModel answer:\n{answer}"
-with gr.Blocks(title="DocQA Agent: Intelligent Q&A from Extracted English Document") as app:
     gr.Markdown("""
-    # DocQA Agent
     <br>
-    A multi-agent system for question answering from English documents (OCR + retrieval + intelligent answer)
     """)
     with gr.Row():
         with gr.Column():

 import gradio as gr
 import easyocr
 import numpy as np
+from langchain_community.llms import HuggingFacePipeline
+from langchain.chains import RetrievalQA
+from langchain_community.vectorstores import FAISS
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.docstore.document import Document
+from transformers import pipeline as hf_pipeline
 # 1. OCR Processor (English)
 class OCRProcessor:
         except Exception as e:
             return f"OCR error: {str(e)}"
+# 2. LangChain-based DocQA Agent
+class LangChainDocQAAgent:
     def __init__(self):
+        # Embedding model
+        self.embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
+        # Text splitter (chunk size and overlap for better retrieval)
+        self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
+        # HuggingFace QA pipeline as an LLM
+        self.qa_llm = HuggingFacePipeline(
+            pipeline=hf_pipeline(
+                "question-answering",
+                model="deepset/roberta-base-squad2",
+                tokenizer="deepset/roberta-base-squad2"
+            ),
+            model_kwargs={"return_full_text": False}
         )
+    def prepare_retriever(self, text):
+        # Split text into LangChain Document objects
+        docs = [Document(page_content=chunk) for chunk in self.text_splitter.split_text(text)]
+        # Create FAISS vectorstore for retrieval
+        vectorstore = FAISS.from_documents(docs, self.embeddings)
+        return vectorstore.as_retriever(), docs
+    def qa(self, text, question):
+        if not text.strip() or not question.strip():
+            return "No text or question provided.", ""
+        # Build retriever from text
+        retriever, docs = self.prepare_retriever(text)
+        # RetrievalQA chain: retrieve relevant chunk and answer
+        qa_chain = RetrievalQA.from_chain_type(
+            llm=self.qa_llm,
+            chain_type="stuff",
+            retriever=retriever,
+            return_source_documents=True
+        )
+        result = qa_chain({"query": question})
+        answer = result["result"]
+        # Show the most relevant chunk as context
+        relevant_context = result["source_documents"][0].page_content if result["source_documents"] else ""
+        return relevant_context, answer
 ocr_processor = OCRProcessor()
+docqa_agent = LangChainDocQAAgent()
 def docqa_pipeline(image, question):
     # 1. OCR
     context = ocr_processor.extract_text(image)
     if context.startswith("OCR error"):
         return context, "No answer."
+    # 2. LangChain RetrievalQA
+    relevant_chunk, answer = docqa_agent.qa(context, question)
     return context, f"Relevant chunk:\n{relevant_chunk}\n\nModel answer:\n{answer}"
+with gr.Blocks(title="DocQA Agent (LangChain): Intelligent Q&A from Extracted English Document") as app:
     gr.Markdown("""
+    # DocQA Agent (LangChain)
     <br>
+    A multi-agent system for question answering from English documents (OCR + retrieval + intelligent answer with LangChain)
     """)
     with gr.Row():
         with gr.Column():