Spaces:
Sleeping
Sleeping
File size: 3,646 Bytes
3fdd093 ecaa05c 67a56f6 3fdd093 67a56f6 3fdd093 dcc36ef 3fdd093 ecaa05c 3fdd093 ced2810 40696fb 3fdd093 40696fb 225229c 3fdd093 40696fb ecaa05c 3fdd093 dcc36ef 40696fb ecaa05c dcc36ef 3fdd093 40696fb 3fdd093 40696fb 3fdd093 40696fb 3fdd093 dcc36ef 3fdd093 ecaa05c 3fdd093 40696fb 225229c 3fdd093 ae644bf 2a4ba68 3fdd093 2a4ba68 3fdd093 40696fb 3fdd093 40696fb d179e57 3fdd093 225229c 3fdd093 225229c 40696fb 3fdd093 2a4ba68 3fdd093 d179e57 3fdd093 40696fb ced2810 3fdd093 ced2810 3fdd093 d179e57 3fdd093 d179e57 40696fb 3fdd093 d179e57 3fdd093 d179e57 3fdd093 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
# app.py
import os
from pathlib import Path
import gradio as gr
from PIL import Image
from huggingface_hub import InferenceClient
# β
Community imports
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.llms import HuggingFaceEndpoint
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from unstructured.partition.pdf import partition_pdf
from unstructured.partition.utils.constants import PartitionStrategy
# βββββ Config & Folders βββββ
PDF_DIR = Path("pdfs")
FIG_DIR = Path("figures")
PDF_DIR.mkdir(exist_ok=True)
FIG_DIR.mkdir(exist_ok=True)
# βββββ Read your HF_TOKEN secret βββββ
hf_token = os.environ["HF_TOKEN"]
# βββββ Embeddings & LLM Setup βββββ
embedding_model = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2"
)
# LLM via HF Inference API endpoint
llm = HuggingFaceEndpoint(
endpoint_url="https://api-inference.huggingface.co/models/google/flan-t5-base",
huggingfacehub_api_token=hf_token,
temperature=0.5,
max_length=512,
)
# Prompt
TEMPLATE = """
Use the following context to answer the question. If unknown, say so.
Context: {context}
Question: {question}
Answer (up to 3 sentences):
"""
prompt = PromptTemplate(template=TEMPLATE, input_variables=["context", "question"])
# Inference client for image captioning
vision_client = InferenceClient(
repo_id="Salesforce/blip-image-captioning-base",
token=hf_token,
)
# Globals (will initialize after processing)
vector_store = None
qa_chain = None
def extract_image_caption(path: str) -> str:
with Image.open(path) as img:
return vision_client.image_to_text(img)
def process_pdf(pdf_file) -> str:
global vector_store, qa_chain
out_path = PDF_DIR / pdf_file.name
with open(out_path, "wb") as f:
f.write(pdf_file.read())
elems = partition_pdf(
str(out_path),
strategy=PartitionStrategy.HI_RES,
extract_image_block_types=["Image", "Table"],
extract_image_block_output_dir=str(FIG_DIR),
)
texts = [el.text for el in elems if el.category not in ("Image", "Table")]
for img_file in FIG_DIR.iterdir():
texts.append(extract_image_caption(str(img_file)))
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
docs = splitter.split_text("\n\n".join(texts))
vector_store = FAISS.from_texts(docs, embedding_model)
qa_chain = RetrievalQA.from_chain_type(
llm=llm,
retriever=vector_store.as_retriever(),
chain_type_kwargs={"prompt": prompt},
)
return f"β
Processed `{pdf_file.name}` into {len(docs)} chunks."
def answer_query(question: str) -> str:
if qa_chain is None:
return "β Please upload and process a PDF first."
return qa_chain.run(question)
# βββββ Gradio UI βββββ
with gr.Blocks() as demo:
gr.Markdown("## ππ· Multimodal RAG β Hugging Face Spaces")
with gr.Row():
pdf_in = gr.File(label="Upload PDF", type="file")
btn_proc = gr.Button("Process PDF")
status = gr.Textbox(label="Status")
with gr.Row():
q_in = gr.Textbox(label="Your Question")
btn_ask = gr.Button("Ask")
ans_out = gr.Textbox(label="Answer")
btn_proc.click(fn=process_pdf, inputs=pdf_in, outputs=status)
btn_ask.click(fn=answer_query, inputs=q_in, outputs=ans_out)
if __name__ == "__main__":
demo.launch()
|