DocQA_Agent / app.py
OmidSakaki's picture
Update app.py
258e6aa verified
import gradio as gr
import easyocr
import numpy as np
from langchain_community.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document
from transformers import pipeline as hf_pipeline
# 1. OCR Processor (English)
class OCRProcessor:
def __init__(self):
self.reader = easyocr.Reader(['en'])
def extract_text(self, image: np.ndarray) -> str:
try:
results = self.reader.readtext(image, detail=0, paragraph=True)
return "\n".join(results) if results else ""
except Exception as e:
return f"OCR error: {str(e)}"
# 2. LangChain-based DocQA Agent
class LangChainDocQAAgent:
def __init__(self):
self.embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
self.qa_pipeline = hf_pipeline(
"question-answering",
model="deepset/roberta-base-squad2",
tokenizer="deepset/roberta-base-squad2"
)
def prepare_retriever(self, text):
docs = [Document(page_content=chunk) for chunk in self.text_splitter.split_text(text)]
vectorstore = FAISS.from_documents(docs, self.embeddings)
return vectorstore.as_retriever(), docs
def qa(self, text, question):
if not text.strip() or not question.strip():
return "No text or question provided.", ""
retriever, docs = self.prepare_retriever(text)
relevant_docs = retriever.get_relevant_documents(question)
relevant_context = relevant_docs[0].page_content if relevant_docs else ""
if relevant_context:
result = self.qa_pipeline({"context": relevant_context, "question": question})
answer = result["answer"]
else:
answer = "No answer found."
return relevant_context, answer
ocr_processor = OCRProcessor()
docqa_agent = LangChainDocQAAgent()
def docqa_pipeline(image, question):
# 1. OCR
context = ocr_processor.extract_text(image)
if context.startswith("OCR error"):
return context, "No answer."
# 2. LangChain RetrievalQA
relevant_chunk, answer = docqa_agent.qa(context, question)
return context, f"Relevant chunk:\n{relevant_chunk}\n\nModel answer:\n{answer}"
with gr.Blocks(title="DocQA Agent (LangChain): Intelligent Q&A from Extracted English Document") as app:
gr.Markdown("""
# omidsakaki.ir
<br>
A multi-agent system for question answering from English documents (OCR + retrieval + intelligent answer with LangChain)
""")
with gr.Row():
with gr.Column():
img_input = gr.Image(label="Input Image", type="numpy")
question_input = gr.Textbox(label="Your question (in English)", placeholder="e.g. Who is the author of this text?", lines=1)
process_btn = gr.Button("Get Answer")
with gr.Column():
context_output = gr.Textbox(label="Extracted Text", lines=10, max_lines=None, interactive=False)
answer_output = gr.Textbox(label="Model Output (Relevant Chunk & Answer)", lines=10, max_lines=None, interactive=False)
process_btn.click(
fn=docqa_pipeline,
inputs=[img_input, question_input],
outputs=[context_output, answer_output]
)
if __name__ == "__main__":
app.launch()