Spaces:
Sleeping
Sleeping
File size: 4,130 Bytes
9453eac 5c3f634 4abc449 87a1c7f 9453eac fd5f89e c5a772e fd5f89e 54a29b3 c5a772e 6ecc4f4 c5a772e fd5f89e 768d260 87a1c7f c5a772e 87a1c7f 3209503 6ecc4f4 87a1c7f 57fa964 12a2f23 87a1c7f c5a772e 2711484 12a2f23 fd5f89e 87a1c7f fd5f89e 12a2f23 87a1c7f c5a772e 87a1c7f 2711484 87a1c7f c5a772e 9453eac fd5f89e 279ab91 fd5f89e 28a9f71 57fa964 2711484 12a2f23 57fa964 9453eac 4abc449 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
import gradio as gr
import easyocr
import numpy as np
from langchain_community.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document
from transformers import pipeline as hf_pipeline
# 1. OCR Processor (English)
class OCRProcessor:
def __init__(self):
self.reader = easyocr.Reader(['en'])
def extract_text(self, image: np.ndarray) -> str:
try:
results = self.reader.readtext(image, detail=0, paragraph=True)
return "\n".join(results) if results else ""
except Exception as e:
return f"OCR error: {str(e)}"
# 2. LangChain-based DocQA Agent
class LangChainDocQAAgent:
def __init__(self):
# Embedding model
self.embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
# Text splitter (chunk size and overlap for better retrieval)
self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
# HuggingFace QA pipeline as an LLM
self.qa_llm = HuggingFacePipeline(
pipeline=hf_pipeline(
"question-answering",
model="deepset/roberta-base-squad2",
tokenizer="deepset/roberta-base-squad2"
),
model_kwargs={"return_full_text": False}
)
def prepare_retriever(self, text):
# Split text into LangChain Document objects
docs = [Document(page_content=chunk) for chunk in self.text_splitter.split_text(text)]
# Create FAISS vectorstore for retrieval
vectorstore = FAISS.from_documents(docs, self.embeddings)
return vectorstore.as_retriever(), docs
def qa(self, text, question):
if not text.strip() or not question.strip():
return "No text or question provided.", ""
# Build retriever from text
retriever, docs = self.prepare_retriever(text)
# RetrievalQA chain: retrieve relevant chunk and answer
qa_chain = RetrievalQA.from_chain_type(
llm=self.qa_llm,
chain_type="stuff",
retriever=retriever,
return_source_documents=True
)
result = qa_chain({"query": question})
answer = result["result"]
# Show the most relevant chunk as context
relevant_context = result["source_documents"][0].page_content if result["source_documents"] else ""
return relevant_context, answer
ocr_processor = OCRProcessor()
docqa_agent = LangChainDocQAAgent()
def docqa_pipeline(image, question):
# 1. OCR
context = ocr_processor.extract_text(image)
if context.startswith("OCR error"):
return context, "No answer."
# 2. LangChain RetrievalQA
relevant_chunk, answer = docqa_agent.qa(context, question)
return context, f"Relevant chunk:\n{relevant_chunk}\n\nModel answer:\n{answer}"
with gr.Blocks(title="DocQA Agent (LangChain): Intelligent Q&A from Extracted English Document") as app:
gr.Markdown("""
# DocQA Agent (LangChain)
<br>
A multi-agent system for question answering from English documents (OCR + retrieval + intelligent answer with LangChain)
""")
with gr.Row():
with gr.Column():
img_input = gr.Image(label="Input Image", type="numpy")
question_input = gr.Textbox(label="Your question (in English)", placeholder="e.g. Who is the author of this text?", lines=1)
process_btn = gr.Button("Get Answer")
with gr.Column():
context_output = gr.Textbox(label="Extracted Text", lines=10, max_lines=None, interactive=False)
answer_output = gr.Textbox(label="Model Output (Relevant Chunk & Answer)", lines=10, max_lines=None, interactive=False)
process_btn.click(
fn=docqa_pipeline,
inputs=[img_input, question_input],
outputs=[context_output, answer_output]
)
if __name__ == "__main__":
app.launch() |