Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -22,11 +22,9 @@ from langchain_huggingface import HuggingFaceEmbeddings
|
|
22 |
|
23 |
|
24 |
|
25 |
-
|
26 |
-
# ββ Globals βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
27 |
retriever = None # FAISS retriever for multimodal content
|
28 |
current_pdf_name = None # Name of the currently loaded PDF
|
29 |
-
combined_texts =
|
30 |
|
31 |
# ββ Setup: directories βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
32 |
FIGURES_DIR = "figures"
|
@@ -34,76 +32,89 @@ if os.path.exists(FIGURES_DIR):
|
|
34 |
shutil.rmtree(FIGURES_DIR)
|
35 |
os.makedirs(FIGURES_DIR, exist_ok=True)
|
36 |
|
37 |
-
# ββ
|
38 |
-
#
|
39 |
-
|
40 |
-
#
|
41 |
-
embeddings = HuggingFaceEmbeddings(model_name="google/gemma-3-27b-it")
|
42 |
-
# Image captioning (BLIP)
|
43 |
blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
|
44 |
-
blip_model
|
45 |
|
46 |
|
47 |
def generate_caption(image_path: str) -> str:
|
48 |
-
"""
|
49 |
-
|
50 |
-
"""
|
51 |
-
image = Image.open(image_path).convert('RGB')
|
52 |
inputs = blip_processor(image, return_tensors="pt")
|
53 |
out = blip_model.generate(**inputs)
|
54 |
-
|
55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
|
57 |
|
58 |
def process_pdf(pdf_file) -> str:
|
59 |
"""
|
60 |
-
|
61 |
-
|
62 |
-
Returns status message.
|
63 |
"""
|
64 |
global current_pdf_name, retriever, combined_texts
|
|
|
65 |
if pdf_file is None:
|
66 |
return "β Please upload a PDF file."
|
67 |
|
68 |
-
# Save
|
69 |
pdf_path = pdf_file.name
|
70 |
current_pdf_name = os.path.basename(pdf_path)
|
71 |
|
72 |
-
# Extract
|
73 |
elements = partition_pdf(
|
74 |
filename=pdf_path,
|
75 |
strategy=PartitionStrategy.HI_RES,
|
76 |
extract_image_block_types=["Image", "Table"],
|
77 |
-
extract_image_block_output_dir=FIGURES_DIR
|
78 |
)
|
79 |
|
80 |
-
#
|
81 |
-
text_elements = [
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
|
|
|
|
93 |
combined_texts = text_elements + captions
|
94 |
|
95 |
-
#
|
96 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
retriever = index.as_retriever(search_kwargs={"k": 2})
|
98 |
|
99 |
-
|
100 |
-
|
101 |
|
102 |
|
103 |
def ask_question(question: str) -> str:
|
104 |
-
"""
|
105 |
-
Retrieves relevant chunks from the FAISS index and generates an answer via chat model.
|
106 |
-
"""
|
107 |
global retriever
|
108 |
if retriever is None:
|
109 |
return "β Please upload and process a PDF first."
|
@@ -119,56 +130,65 @@ def ask_question(question: str) -> str:
|
|
119 |
f"Question: {question}\n"
|
120 |
"Answer:"
|
121 |
)
|
122 |
-
|
123 |
-
|
124 |
messages=[{"role": "user", "content": prompt}],
|
125 |
max_tokens=128,
|
126 |
-
temperature=0.5
|
127 |
)
|
128 |
-
|
129 |
-
return answer
|
130 |
|
131 |
|
132 |
def clear_interface():
|
133 |
-
"""
|
134 |
global retriever, current_pdf_name, combined_texts
|
135 |
retriever = None
|
136 |
current_pdf_name = None
|
137 |
-
combined_texts =
|
138 |
-
shutil.rmtree(FIGURES_DIR)
|
139 |
os.makedirs(FIGURES_DIR, exist_ok=True)
|
140 |
return ""
|
141 |
|
|
|
142 |
# ββ Gradio UI ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
143 |
theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="blue")
|
144 |
with gr.Blocks(theme=theme, css="""
|
145 |
.container { border-radius: 10px; padding: 15px; }
|
146 |
-
.pdf-active { border-left: 3px solid #6366f1;
|
147 |
-
|
148 |
-
|
|
|
|
|
|
|
|
|
149 |
""") as demo:
|
150 |
-
gr.Markdown("<div class='main-title'>DocQueryAI (
|
151 |
|
152 |
with gr.Row():
|
153 |
with gr.Column():
|
154 |
gr.Markdown("## π Document Input")
|
155 |
-
|
156 |
-
pdf_file = gr.File(file_types=[".pdf"], type="filepath")
|
157 |
process_btn = gr.Button("π€ Process Document", variant="primary")
|
158 |
-
status_box
|
159 |
|
160 |
with gr.Column():
|
161 |
gr.Markdown("## β Ask Questions")
|
162 |
-
question_input = gr.Textbox(lines=3,
|
163 |
-
|
164 |
-
|
|
|
165 |
|
166 |
clear_btn = gr.Button("ποΈ Clear All", variant="secondary")
|
167 |
-
gr.Markdown("<div class='footer'>Powered by
|
168 |
|
169 |
-
process_btn.click(fn=process_pdf,
|
170 |
-
|
171 |
-
|
|
|
|
|
|
|
|
|
|
|
172 |
|
173 |
if __name__ == "__main__":
|
174 |
-
demo.launch(debug=True, share=True)
|
|
|
22 |
|
23 |
|
24 |
|
|
|
|
|
25 |
retriever = None # FAISS retriever for multimodal content
|
26 |
current_pdf_name = None # Name of the currently loaded PDF
|
27 |
+
combined_texts: List[str] = [] # Combined text + image captions corpus
|
28 |
|
29 |
# ββ Setup: directories βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
30 |
FIGURES_DIR = "figures"
|
|
|
32 |
shutil.rmtree(FIGURES_DIR)
|
33 |
os.makedirs(FIGURES_DIR, exist_ok=True)
|
34 |
|
35 |
+
# ββ Clients & Models βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
36 |
+
hf = InferenceClient() # will use HUGGINGFACEHUB_API_TOKEN from env
|
37 |
+
|
38 |
+
# BLIP captioner (small local model download)
|
|
|
|
|
39 |
blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
|
40 |
+
blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
|
41 |
|
42 |
|
43 |
def generate_caption(image_path: str) -> str:
|
44 |
+
"""Ask BLIP to caption a local image."""
|
45 |
+
image = Image.open(image_path).convert("RGB")
|
|
|
|
|
46 |
inputs = blip_processor(image, return_tensors="pt")
|
47 |
out = blip_model.generate(**inputs)
|
48 |
+
return blip_processor.decode(out[0], skip_special_tokens=True)
|
49 |
+
|
50 |
+
|
51 |
+
def embed_texts(texts: List[str]) -> List[List[float]]:
|
52 |
+
"""
|
53 |
+
Call the HF embeddings endpoint.
|
54 |
+
Uses `google/Gemma-Embeddings-v1.0` (or any other hosted embeddings model).
|
55 |
+
"""
|
56 |
+
resp = hf.embeddings(
|
57 |
+
model="google/Gemma-Embeddings-v1.0",
|
58 |
+
inputs=texts,
|
59 |
+
)
|
60 |
+
return resp["embeddings"]
|
61 |
|
62 |
|
63 |
def process_pdf(pdf_file) -> str:
|
64 |
"""
|
65 |
+
Parse the PDF, caption its images, combine text+captions, embed remotely,
|
66 |
+
build FAISS index, and prepare retriever.
|
|
|
67 |
"""
|
68 |
global current_pdf_name, retriever, combined_texts
|
69 |
+
|
70 |
if pdf_file is None:
|
71 |
return "β Please upload a PDF file."
|
72 |
|
73 |
+
# Save and name
|
74 |
pdf_path = pdf_file.name
|
75 |
current_pdf_name = os.path.basename(pdf_path)
|
76 |
|
77 |
+
# Extract blocks
|
78 |
elements = partition_pdf(
|
79 |
filename=pdf_path,
|
80 |
strategy=PartitionStrategy.HI_RES,
|
81 |
extract_image_block_types=["Image", "Table"],
|
82 |
+
extract_image_block_output_dir=FIGURES_DIR,
|
83 |
)
|
84 |
|
85 |
+
# Split text vs. images
|
86 |
+
text_elements = [
|
87 |
+
el.text for el in elements
|
88 |
+
if el.category not in ["Image", "Table"] and el.text
|
89 |
+
]
|
90 |
+
image_files = [
|
91 |
+
os.path.join(FIGURES_DIR, f)
|
92 |
+
for f in os.listdir(FIGURES_DIR)
|
93 |
+
if f.lower().endswith((".png", ".jpg", ".jpeg"))
|
94 |
+
]
|
95 |
+
|
96 |
+
# Caption images
|
97 |
+
captions = [generate_caption(img) for img in image_files]
|
98 |
+
|
99 |
+
# Combine
|
100 |
combined_texts = text_elements + captions
|
101 |
|
102 |
+
# Remote embeddings
|
103 |
+
vectors = embed_texts(combined_texts)
|
104 |
+
|
105 |
+
# Build FAISS
|
106 |
+
index = FAISS.from_embeddings(
|
107 |
+
texts=combined_texts,
|
108 |
+
embeddings=vectors,
|
109 |
+
)
|
110 |
retriever = index.as_retriever(search_kwargs={"k": 2})
|
111 |
|
112 |
+
return f"β
Indexed '{current_pdf_name}' β " \
|
113 |
+
f"{len(text_elements)} text blocks + {len(captions)} image captions"
|
114 |
|
115 |
|
116 |
def ask_question(question: str) -> str:
|
117 |
+
"""Retrieve top-k chunks from FAISS and call chat_completions endpoint."""
|
|
|
|
|
118 |
global retriever
|
119 |
if retriever is None:
|
120 |
return "β Please upload and process a PDF first."
|
|
|
130 |
f"Question: {question}\n"
|
131 |
"Answer:"
|
132 |
)
|
133 |
+
response = hf.chat_completion(
|
134 |
+
model="google/gemma-3-27b-it",
|
135 |
messages=[{"role": "user", "content": prompt}],
|
136 |
max_tokens=128,
|
137 |
+
temperature=0.5,
|
138 |
)
|
139 |
+
return response["choices"][0]["message"]["content"].strip()
|
|
|
140 |
|
141 |
|
142 |
def clear_interface():
|
143 |
+
"""Reset state and clear extracted images."""
|
144 |
global retriever, current_pdf_name, combined_texts
|
145 |
retriever = None
|
146 |
current_pdf_name = None
|
147 |
+
combined_texts = []
|
148 |
+
shutil.rmtree(FIGURES_DIR, ignore_errors=True)
|
149 |
os.makedirs(FIGURES_DIR, exist_ok=True)
|
150 |
return ""
|
151 |
|
152 |
+
|
153 |
# ββ Gradio UI ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
154 |
theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="blue")
|
155 |
with gr.Blocks(theme=theme, css="""
|
156 |
.container { border-radius: 10px; padding: 15px; }
|
157 |
+
.pdf-active { border-left: 3px solid #6366f1;
|
158 |
+
padding-left: 10px;
|
159 |
+
background-color: rgba(99,102,241,0.1); }
|
160 |
+
.footer { text-align: center; margin-top: 30px;
|
161 |
+
font-size: 0.8em; color: #666; }
|
162 |
+
.main-title { text-align: center; font-size: 64px;
|
163 |
+
font-weight: bold; margin-bottom: 20px; }
|
164 |
""") as demo:
|
165 |
+
gr.Markdown("<div class='main-title'>DocQueryAI (RemoteβRAG)</div>")
|
166 |
|
167 |
with gr.Row():
|
168 |
with gr.Column():
|
169 |
gr.Markdown("## π Document Input")
|
170 |
+
pdf_file = gr.File(label="Upload PDF", file_types=[".pdf"], type="filepath")
|
|
|
171 |
process_btn = gr.Button("π€ Process Document", variant="primary")
|
172 |
+
status_box = gr.Textbox(label="Status", interactive=False)
|
173 |
|
174 |
with gr.Column():
|
175 |
gr.Markdown("## β Ask Questions")
|
176 |
+
question_input = gr.Textbox(lines=3,
|
177 |
+
placeholder="Enter your question hereβ¦")
|
178 |
+
ask_btn = gr.Button("π Ask Question", variant="primary")
|
179 |
+
answer_output = gr.Textbox(label="Answer", lines=8, interactive=False)
|
180 |
|
181 |
clear_btn = gr.Button("ποΈ Clear All", variant="secondary")
|
182 |
+
gr.Markdown("<div class='footer'>Powered by HF Inference + BLIP + FAISS | Gradio</div>")
|
183 |
|
184 |
+
process_btn.click(fn=process_pdf,
|
185 |
+
inputs=[pdf_file],
|
186 |
+
outputs=[status_box])
|
187 |
+
ask_btn.click(fn=ask_question,
|
188 |
+
inputs=[question_input],
|
189 |
+
outputs=[answer_output])
|
190 |
+
clear_btn.click(fn=clear_interface,
|
191 |
+
outputs=[status_box, answer_output])
|
192 |
|
193 |
if __name__ == "__main__":
|
194 |
+
demo.launch(debug=True, share=True)
|