Spaces:
Sleeping
Sleeping
File size: 3,617 Bytes
9453eac 5c3f634 4abc449 87a1c7f 9453eac fd5f89e c5a772e fd5f89e 54a29b3 c5a772e 6ecc4f4 c5a772e fd5f89e 768d260 87a1c7f c5a772e 87a1c7f 872e2d7 3209503 6ecc4f4 87a1c7f 872e2d7 87a1c7f 57fa964 12a2f23 87a1c7f c5a772e 2711484 12a2f23 fd5f89e 87a1c7f fd5f89e 12a2f23 87a1c7f c5a772e 258e6aa 2711484 87a1c7f c5a772e 9453eac fd5f89e 279ab91 fd5f89e 28a9f71 57fa964 2711484 12a2f23 57fa964 9453eac 4abc449 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
import gradio as gr
import easyocr
import numpy as np
from langchain_community.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document
from transformers import pipeline as hf_pipeline
# 1. OCR Processor (English)
class OCRProcessor:
def __init__(self):
self.reader = easyocr.Reader(['en'])
def extract_text(self, image: np.ndarray) -> str:
try:
results = self.reader.readtext(image, detail=0, paragraph=True)
return "\n".join(results) if results else ""
except Exception as e:
return f"OCR error: {str(e)}"
# 2. LangChain-based DocQA Agent
class LangChainDocQAAgent:
def __init__(self):
self.embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
self.qa_pipeline = hf_pipeline(
"question-answering",
model="deepset/roberta-base-squad2",
tokenizer="deepset/roberta-base-squad2"
)
def prepare_retriever(self, text):
docs = [Document(page_content=chunk) for chunk in self.text_splitter.split_text(text)]
vectorstore = FAISS.from_documents(docs, self.embeddings)
return vectorstore.as_retriever(), docs
def qa(self, text, question):
if not text.strip() or not question.strip():
return "No text or question provided.", ""
retriever, docs = self.prepare_retriever(text)
relevant_docs = retriever.get_relevant_documents(question)
relevant_context = relevant_docs[0].page_content if relevant_docs else ""
if relevant_context:
result = self.qa_pipeline({"context": relevant_context, "question": question})
answer = result["answer"]
else:
answer = "No answer found."
return relevant_context, answer
ocr_processor = OCRProcessor()
docqa_agent = LangChainDocQAAgent()
def docqa_pipeline(image, question):
# 1. OCR
context = ocr_processor.extract_text(image)
if context.startswith("OCR error"):
return context, "No answer."
# 2. LangChain RetrievalQA
relevant_chunk, answer = docqa_agent.qa(context, question)
return context, f"Relevant chunk:\n{relevant_chunk}\n\nModel answer:\n{answer}"
with gr.Blocks(title="DocQA Agent (LangChain): Intelligent Q&A from Extracted English Document") as app:
gr.Markdown("""
# omidsakaki.ir
<br>
A multi-agent system for question answering from English documents (OCR + retrieval + intelligent answer with LangChain)
""")
with gr.Row():
with gr.Column():
img_input = gr.Image(label="Input Image", type="numpy")
question_input = gr.Textbox(label="Your question (in English)", placeholder="e.g. Who is the author of this text?", lines=1)
process_btn = gr.Button("Get Answer")
with gr.Column():
context_output = gr.Textbox(label="Extracted Text", lines=10, max_lines=None, interactive=False)
answer_output = gr.Textbox(label="Model Output (Relevant Chunk & Answer)", lines=10, max_lines=None, interactive=False)
process_btn.click(
fn=docqa_pipeline,
inputs=[img_input, question_input],
outputs=[context_output, answer_output]
)
if __name__ == "__main__":
app.launch() |