Spaces:
Sleeping
Sleeping
File size: 4,450 Bytes
9453eac 5c3f634 4abc449 3209503 2711484 9453eac 2711484 c5a772e 54a29b3 c5a772e 6ecc4f4 c5a772e 12a2f23 768d260 2711484 3209503 c5a772e 3209503 6ecc4f4 12a2f23 57fa964 adbe621 12a2f23 57fa964 12a2f23 57fa964 2711484 12a2f23 2711484 c5a772e 2711484 12a2f23 2711484 12a2f23 2711484 c5a772e 2711484 c5a772e 9453eac c5a772e 12a2f23 279ab91 12a2f23 2711484 28a9f71 57fa964 2711484 12a2f23 57fa964 9453eac 4abc449 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 |
import gradio as gr
import easyocr
import numpy as np
from transformers import pipeline
from sentence_transformers import SentenceTransformer
import faiss
import torch
# 1. OCR Processor
class OCRProcessor:
def __init__(self):
self.reader = easyocr.Reader(['fa'])
def extract_text(self, image: np.ndarray) -> str:
try:
results = self.reader.readtext(image, detail=0, paragraph=True)
return "\n".join(results) if results else ""
except Exception as e:
return f"خطا در پردازش OCR: {str(e)}"
# 2. Text Chunker
def text_chunker(text, chunk_size=250, overlap=50):
words = text.split()
chunks = []
i = 0
while i < len(words):
chunk = " ".join(words[i:i+chunk_size])
chunks.append(chunk)
i += chunk_size - overlap
return chunks
# 3. Embedding Agent
class EmbeddingAgent:
def __init__(self):
self.model = SentenceTransformer('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2')
def embed(self, texts):
return self.model.encode(texts)
# 4. Retriever Agent (with FAISS)
class RetrieverAgent:
def __init__(self, embeddings, texts):
self.texts = texts
d = embeddings.shape[1]
self.index = faiss.IndexFlatL2(d)
self.index.add(embeddings)
def retrieve(self, query_embedding, top_k=1):
D, I = self.index.search(query_embedding, top_k)
return [self.texts[idx] for idx in I[0]]
# 5. QA Agent (using multilingual QA model)
class MultilingualQAModel:
def __init__(self):
self.qa_pipeline = pipeline(
"question-answering",
model="deepset/roberta-base-squad2",
tokenizer="deepset/roberta-base-squad2"
)
def answer_question(self, context: str, question: str) -> str:
if not context.strip() or not question.strip():
return "متن یا سوال وارد نشده است."
try:
result = self.qa_pipeline({"context": context, "question": question})
answer = result.get('answer', '').strip()
if not answer or answer in ['[CLS]', '[SEP]', '[PAD]']:
return "جوابی یافت نشد."
return answer
except Exception as e:
return f"خطا در مدل پرسش و پاسخ: {str(e)}"
# Full DocQA Pipeline
ocr_processor = OCRProcessor()
embedder_agent = EmbeddingAgent()
qa_agent = MultilingualQAModel()
def docqa_pipeline(image, question):
# 1. OCR
context = ocr_processor.extract_text(image)
if context.startswith("خطا"):
return context, "پاسخی وجود ندارد"
# 2. Chunking
chunks = text_chunker(context)
# 3. Embedding (chunks + question)
chunk_embeddings = embedder_agent.embed(chunks)
question_embedding = embedder_agent.embed([question])
# 4. Retriever: پیدا کردن مرتبطترین بخش
retriever = RetrieverAgent(chunk_embeddings, chunks)
relevant_chunk = retriever.retrieve(question_embedding, top_k=1)[0]
# 5. QA: پاسخ به سوال بر اساس بخش بازیابیشده
answer = qa_agent.answer_question(relevant_chunk, question)
return context, f"متن مرتبط:\n{relevant_chunk}\n\nپاسخ مدل:\n{answer}"
with gr.Blocks(title="DocQA Agent: پرسش و پاسخ هوشمند از سند فارسی استخراجشده از تصویر") as app:
gr.Markdown("""
# DocQA Agent
<br>
یک سامانه چندعاملی برای پرسش و پاسخ از اسناد فارسی (OCR + جستجو + پاسخ هوشمند)
""")
with gr.Row():
with gr.Column():
img_input = gr.Image(label="تصویر ورودی", type="numpy")
question_input = gr.Textbox(label="سوال شما به فارسی", placeholder="مثلاً: نویسنده این متن کیست؟", lines=1)
process_btn = gr.Button("پاسخ")
with gr.Column():
context_output = gr.Textbox(label="متن استخراج شده", lines=10, max_lines=None, interactive=False)
answer_output = gr.Textbox(label="جواب مدل (بخش مرتبط و پاسخ)", lines=10, max_lines=None, interactive=False)
process_btn.click(
fn=docqa_pipeline,
inputs=[img_input, question_input],
outputs=[context_output, answer_output]
)
if __name__ == "__main__":
app.launch() |