Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,598 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import chromadb
|
3 |
+
import google.generativeai as genai
|
4 |
+
import os
|
5 |
+
from dotenv import load_dotenv
|
6 |
+
import logging
|
7 |
+
import functools
|
8 |
+
from collections import defaultdict
|
9 |
+
|
10 |
+
# --- Configuration ---
|
11 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
12 |
+
|
13 |
+
# Load environment variables (for API Key)
|
14 |
+
load_dotenv()
|
15 |
+
API_KEY = os.getenv("GEMINI_API_KEY")
|
16 |
+
if not API_KEY:
|
17 |
+
logging.error("GEMINI_API_KEY not found in environment variables.")
|
18 |
+
else:
|
19 |
+
try:
|
20 |
+
genai.configure(api_key=API_KEY)
|
21 |
+
logging.info("Gemini API configured successfully.")
|
22 |
+
except Exception as e:
|
23 |
+
logging.error(f"Error configuring Gemini API: {e}")
|
24 |
+
|
25 |
+
# Chroma DB Configuration
|
26 |
+
CHROMA_DB_PATH = "./chroma"
|
27 |
+
COLLECTION_NAME = "phil_de"
|
28 |
+
|
29 |
+
# Gemini Embedding Model Configuration
|
30 |
+
# Make sure this matches the model used to create the DB (expecting 3072 dims based on past errors)
|
31 |
+
EMBEDDING_MODEL = "models/gemini-embedding-exp-03-07"
|
32 |
+
logging.info(f"Using embedding model: {EMBEDDING_MODEL}")
|
33 |
+
|
34 |
+
# --- Constants ---
|
35 |
+
MAX_RESULTS = 20
|
36 |
+
|
37 |
+
# --- ChromaDB Connection and Author Fetching ---
|
38 |
+
collection = None
|
39 |
+
unique_authors = []
|
40 |
+
try:
|
41 |
+
client = chromadb.PersistentClient(path=CHROMA_DB_PATH)
|
42 |
+
collection = client.get_collection(name=COLLECTION_NAME)
|
43 |
+
logging.info(f"Successfully connected to ChromaDB collection '{COLLECTION_NAME}'. Collection count: {collection.count()}")
|
44 |
+
|
45 |
+
logging.info("Fetching all metadata to extract unique authors...")
|
46 |
+
all_metadata = collection.get(include=['metadatas'])
|
47 |
+
if all_metadata and 'metadatas' in all_metadata and all_metadata['metadatas']:
|
48 |
+
authors_set = set()
|
49 |
+
for meta in all_metadata['metadatas']:
|
50 |
+
if meta and 'author' in meta and meta['author']:
|
51 |
+
authors_set.add(meta['author'])
|
52 |
+
unique_authors = sorted(list(authors_set))
|
53 |
+
logging.info(f"Found {len(unique_authors)} unique authors.")
|
54 |
+
else:
|
55 |
+
logging.warning("Could not retrieve metadata or no metadata found to extract authors.")
|
56 |
+
|
57 |
+
except Exception as e:
|
58 |
+
logging.critical(f"FATAL: Could not connect to Chroma DB or fetch authors: {e}", exc_info=True)
|
59 |
+
unique_authors = []
|
60 |
+
|
61 |
+
# --- Embedding Function ---
|
62 |
+
def get_embedding(text, task="RETRIEVAL_QUERY"):
|
63 |
+
if not API_KEY:
|
64 |
+
logging.error("Cannot generate embedding: API key not configured.")
|
65 |
+
return None
|
66 |
+
if not text:
|
67 |
+
logging.warning("Embedding requested for empty text.")
|
68 |
+
return None
|
69 |
+
try:
|
70 |
+
logging.info(f"Generating embedding for task: {task}")
|
71 |
+
result = genai.embed_content(
|
72 |
+
model=EMBEDDING_MODEL,
|
73 |
+
content=text,
|
74 |
+
task_type=task
|
75 |
+
)
|
76 |
+
logging.info("Embedding generated successfully.")
|
77 |
+
return result['embedding']
|
78 |
+
except Exception as e:
|
79 |
+
logging.error(f"Error generating Gemini embedding: {e}", exc_info=True)
|
80 |
+
if "model" in str(e).lower() and ("not found" in str(e).lower() or "permission" in str(e).lower()):
|
81 |
+
logging.error(f"The configured embedding model '{EMBEDDING_MODEL}' might be incorrect, unavailable, or lack permissions.")
|
82 |
+
elif "dimension" in str(e).lower():
|
83 |
+
logging.error(f"Potential dimension mismatch issue with model '{EMBEDDING_MODEL}'.")
|
84 |
+
return None
|
85 |
+
|
86 |
+
|
87 |
+
# --- Helper: Format Single Result (for top display area) ---
|
88 |
+
def format_single_result(result_data, index, total_results):
|
89 |
+
"""Formats the data for a single result into Markdown for the top preview area."""
|
90 |
+
if not result_data:
|
91 |
+
return "No result data available."
|
92 |
+
|
93 |
+
metadata = result_data.get('metadata', {})
|
94 |
+
doc = result_data.get('document', "N/A")
|
95 |
+
distance = result_data.get('distance', float('inf'))
|
96 |
+
|
97 |
+
author = metadata.get('author', 'N/A')
|
98 |
+
book = metadata.get('book', 'N/A')
|
99 |
+
section = metadata.get('section', 'N/A')
|
100 |
+
|
101 |
+
md_content = ""
|
102 |
+
md_content += f"* **Author:** {author}\n"
|
103 |
+
md_content += f"* **Book:** {book}\n"
|
104 |
+
if section not in ['Unknown', 'N/A', None]:
|
105 |
+
md_content += f"* **Section:** {section}\n"
|
106 |
+
md_content += f"* **Distance:** {distance:.4f}\n\n"
|
107 |
+
md_content += f"> {doc}\n\n"
|
108 |
+
return md_content
|
109 |
+
|
110 |
+
# --- Helper: Format Reading Passage (Deprecated - formatting now done in format_context_markdown) ---
|
111 |
+
# def format_reading_passage(passage_data): # No longer needed as separate function
|
112 |
+
# ...
|
113 |
+
|
114 |
+
# --- Context Formatting Helper ---
|
115 |
+
def format_context_markdown(passages):
|
116 |
+
"""
|
117 |
+
Formats a list of passage dictionaries into a seamless Markdown string
|
118 |
+
for the reading area, *without* a header.
|
119 |
+
"""
|
120 |
+
if not passages:
|
121 |
+
return ""
|
122 |
+
|
123 |
+
valid_passages = [p for p in passages if p and p.get('id') is not None]
|
124 |
+
valid_passages.sort(key=lambda p: int(p.get('id', -1)))
|
125 |
+
|
126 |
+
if not valid_passages:
|
127 |
+
return ""
|
128 |
+
|
129 |
+
# Combine Passage Texts
|
130 |
+
full_text = ""
|
131 |
+
for i, passage in enumerate(valid_passages):
|
132 |
+
doc = passage.get('doc', '_Passage text missing_')
|
133 |
+
role = passage.get('role', 'context') # Includes 'current_reading', 'prev', 'next'
|
134 |
+
|
135 |
+
if role == 'missing':
|
136 |
+
continue # Skip placeholders like "Beginning/End of document"
|
137 |
+
|
138 |
+
full_text += doc
|
139 |
+
|
140 |
+
# Add separator if not the last passage and next isn't missing
|
141 |
+
if i < len(valid_passages) - 1:
|
142 |
+
if valid_passages[i+1].get('role') != 'missing':
|
143 |
+
full_text += "\n\n"
|
144 |
+
|
145 |
+
return full_text
|
146 |
+
|
147 |
+
# --- Search Function (Complete) ---
|
148 |
+
def search_philosophical_texts(query, selected_authors):
|
149 |
+
"""
|
150 |
+
Performs search, stores all results in state, displays the first result.
|
151 |
+
Returns updates for multiple components and state variables.
|
152 |
+
"""
|
153 |
+
# Initialize updates dictionary with default states
|
154 |
+
updates = {
|
155 |
+
full_search_results_state: [],
|
156 |
+
current_result_index_state: 0,
|
157 |
+
single_result_group: gr.Group(visible=False),
|
158 |
+
result_index_indicator_md: gr.Markdown(""),
|
159 |
+
single_result_display_md: gr.Markdown(""),
|
160 |
+
previous_result_button: gr.Button(visible=False),
|
161 |
+
next_result_button: gr.Button(visible=False),
|
162 |
+
weiterlesen_button: gr.Button(visible=False), # Default to hidden
|
163 |
+
context_display: gr.Markdown(""),
|
164 |
+
displayed_context_passages: [],
|
165 |
+
load_previous_button: gr.Button(visible=False),
|
166 |
+
load_next_button: gr.Button(visible=False),
|
167 |
+
}
|
168 |
+
|
169 |
+
# --- Pre-computation Checks ---
|
170 |
+
if collection is None:
|
171 |
+
logging.error("Search attempted but ChromaDB collection is not available.")
|
172 |
+
updates[single_result_display_md] = gr.Markdown("Error: Database connection failed.")
|
173 |
+
updates[single_result_group] = gr.Group(visible=True) # Show group to display error
|
174 |
+
return updates
|
175 |
+
|
176 |
+
if not query:
|
177 |
+
logging.warning("Empty query received.")
|
178 |
+
updates[single_result_display_md] = gr.Markdown("Please enter a query.")
|
179 |
+
updates[single_result_group] = gr.Group(visible=True) # Show group to display message
|
180 |
+
return updates
|
181 |
+
|
182 |
+
logging.info(f"Received query: '{query[:50]}...'")
|
183 |
+
logging.info(f"Selected Authors for filtering: {selected_authors}")
|
184 |
+
|
185 |
+
# --- Embedding ---
|
186 |
+
query_embedding = get_embedding(query, task="RETRIEVAL_QUERY")
|
187 |
+
if query_embedding is None:
|
188 |
+
logging.error("Failed to generate query embedding.")
|
189 |
+
updates[single_result_display_md] = gr.Markdown("Error: Failed to generate query embedding.")
|
190 |
+
updates[single_result_group] = gr.Group(visible=True)
|
191 |
+
return updates
|
192 |
+
|
193 |
+
# --- Filtering ---
|
194 |
+
where_filter = None
|
195 |
+
if selected_authors:
|
196 |
+
where_filter = {"author": {"$in": selected_authors}}
|
197 |
+
logging.info(f"Applying WHERE filter: {where_filter}")
|
198 |
+
|
199 |
+
# --- Query Execution and Result Processing ---
|
200 |
+
try:
|
201 |
+
logging.info(f"Querying collection '{COLLECTION_NAME}' for top {MAX_RESULTS} results.")
|
202 |
+
|
203 |
+
# --->>> ACTUAL QUERY CALL <<<---
|
204 |
+
results = collection.query(
|
205 |
+
query_embeddings=[query_embedding],
|
206 |
+
n_results=MAX_RESULTS,
|
207 |
+
where=where_filter,
|
208 |
+
include=['documents', 'metadatas', 'distances'] # IDs are included by default
|
209 |
+
)
|
210 |
+
# --->>> END QUERY CALL <<<---
|
211 |
+
|
212 |
+
# Process results if found
|
213 |
+
all_results_data = []
|
214 |
+
if results and results.get('ids') and results['ids'][0]:
|
215 |
+
num_found = len(results['ids'][0])
|
216 |
+
logging.info(f"Query successful. Found {num_found} results.")
|
217 |
+
|
218 |
+
ids_list = results['ids'][0]
|
219 |
+
docs_list = results['documents'][0]
|
220 |
+
metadatas_list = results['metadatas'][0]
|
221 |
+
distances_list = results['distances'][0]
|
222 |
+
|
223 |
+
# --->>> ACTUAL RESULT PROCESSING LOOP <<<---
|
224 |
+
for i in range(num_found):
|
225 |
+
# Validate ID conversion (just in case)
|
226 |
+
try:
|
227 |
+
_ = int(ids_list[i]) # Check if convertible
|
228 |
+
except ValueError:
|
229 |
+
logging.warning(f"Skipping result with non-integer ID: {ids_list[i]}")
|
230 |
+
continue
|
231 |
+
|
232 |
+
all_results_data.append({
|
233 |
+
"id": ids_list[i],
|
234 |
+
"document": docs_list[i],
|
235 |
+
"metadata": metadatas_list[i],
|
236 |
+
"distance": distances_list[i]
|
237 |
+
})
|
238 |
+
# --->>> END RESULT PROCESSING LOOP <<<---
|
239 |
+
|
240 |
+
if all_results_data:
|
241 |
+
# Results found and processed successfully
|
242 |
+
updates[full_search_results_state] = all_results_data
|
243 |
+
updates[current_result_index_state] = 0
|
244 |
+
first_result_md = format_single_result(all_results_data[0], 0, len(all_results_data))
|
245 |
+
updates[single_result_display_md] = gr.Markdown(first_result_md)
|
246 |
+
updates[single_result_group] = gr.Group(visible=True) # Show group
|
247 |
+
updates[result_index_indicator_md] = gr.Markdown(f"Result **1** of **{len(all_results_data)}**")
|
248 |
+
updates[previous_result_button] = gr.Button(visible=True, interactive=False)
|
249 |
+
updates[next_result_button] = gr.Button(visible=True, interactive=(len(all_results_data) > 1))
|
250 |
+
updates[weiterlesen_button] = gr.Button(visible=True) # Show this button
|
251 |
+
else:
|
252 |
+
# Query returned results, but none were valid after processing
|
253 |
+
logging.info("No valid results found after filtering/validation.")
|
254 |
+
updates[single_result_display_md] = gr.Markdown("No results found matching your query and filters.")
|
255 |
+
updates[single_result_group] = gr.Group(visible=True) # Show message
|
256 |
+
updates[weiterlesen_button] = gr.Button(visible=False) # Hide button
|
257 |
+
|
258 |
+
else:
|
259 |
+
# Query returned no results
|
260 |
+
logging.info("No results found for the query (or matching the filter).")
|
261 |
+
updates[single_result_display_md] = gr.Markdown("No results found matching your query and filters.")
|
262 |
+
updates[single_result_group] = gr.Group(visible=True) # Show message
|
263 |
+
updates[weiterlesen_button] = gr.Button(visible=False) # Hide button
|
264 |
+
|
265 |
+
return updates
|
266 |
+
|
267 |
+
# --->>> ACTUAL EXCEPTION HANDLING <<<---
|
268 |
+
except Exception as e:
|
269 |
+
logging.error(f"Error querying ChromaDB or processing results: {e}", exc_info=True)
|
270 |
+
|
271 |
+
# Define error_msg based on the exception
|
272 |
+
if "dimension" in str(e).lower():
|
273 |
+
error_msg = "**Error:** Database search failed due to embedding mismatch. Please check configuration."
|
274 |
+
else:
|
275 |
+
# Display the actual error message type from the exception
|
276 |
+
error_msg = f"**Error:** An unexpected error occurred during search. See logs for details. ({type(e).__name__})"
|
277 |
+
|
278 |
+
# Update the UI to show the error message
|
279 |
+
updates[single_result_display_md] = gr.Markdown(error_msg)
|
280 |
+
updates[single_result_group] = gr.Group(visible=True) # Show the group to display the error
|
281 |
+
# Reset state on error
|
282 |
+
updates[full_search_results_state] = []
|
283 |
+
updates[current_result_index_state] = 0
|
284 |
+
updates[weiterlesen_button] = gr.Button(visible=False)
|
285 |
+
updates[previous_result_button] = gr.Button(visible=False)
|
286 |
+
updates[next_result_button] = gr.Button(visible=False)
|
287 |
+
updates[result_index_indicator_md] = gr.Markdown("")
|
288 |
+
updates[context_display] = gr.Markdown("")
|
289 |
+
updates[displayed_context_passages] = []
|
290 |
+
updates[load_previous_button] = gr.Button(visible=False)
|
291 |
+
updates[load_next_button] = gr.Button(visible=False)
|
292 |
+
|
293 |
+
return updates
|
294 |
+
# --->>> END EXCEPTION HANDLING <<<---
|
295 |
+
|
296 |
+
|
297 |
+
# --- Result Navigation Function ---
|
298 |
+
def navigate_results(direction, current_index, full_results):
|
299 |
+
"""Handles moving between search results in the top display area."""
|
300 |
+
updates = {}
|
301 |
+
if not full_results:
|
302 |
+
logging.warning("Navigate called with no results in state.")
|
303 |
+
return { current_result_index_state: 0 }
|
304 |
+
|
305 |
+
total_results = len(full_results)
|
306 |
+
new_index = current_index
|
307 |
+
|
308 |
+
if direction == 'previous':
|
309 |
+
new_index = max(0, current_index - 1)
|
310 |
+
elif direction == 'next':
|
311 |
+
new_index = min(total_results - 1, current_index + 1)
|
312 |
+
|
313 |
+
# Only update display if the index actually changed
|
314 |
+
if new_index != current_index:
|
315 |
+
logging.info(f"Navigating from result index {current_index} to {new_index}")
|
316 |
+
result_data = full_results[new_index]
|
317 |
+
result_md = format_single_result(result_data, new_index, total_results)
|
318 |
+
updates[single_result_display_md] = gr.Markdown(result_md)
|
319 |
+
updates[current_result_index_state] = new_index
|
320 |
+
updates[result_index_indicator_md] = gr.Markdown(f"Result **{new_index + 1}** of **{total_results}**")
|
321 |
+
updates[context_display] = gr.Markdown("") # Clear reading area
|
322 |
+
updates[displayed_context_passages] = []
|
323 |
+
updates[load_previous_button] = gr.Button(visible=False)
|
324 |
+
updates[load_next_button] = gr.Button(visible=False)
|
325 |
+
updates[weiterlesen_button] = gr.Button(visible=True) # Make visible again
|
326 |
+
|
327 |
+
# Update navigation button interactivity based on the *new* index
|
328 |
+
updates[previous_result_button] = gr.Button(interactive=(new_index > 0))
|
329 |
+
updates[next_result_button] = gr.Button(interactive=(new_index < total_results - 1))
|
330 |
+
|
331 |
+
# If index didn't change, ensure button states are still returned correctly
|
332 |
+
if new_index == current_index:
|
333 |
+
# Ensure weiterlesen visibility is returned if index didn't change
|
334 |
+
# (it should already be visible unless user clicked at boundary where it was hidden)
|
335 |
+
# Let's explicitly set it visible for safety upon any nav click if results exist
|
336 |
+
if total_results > 0:
|
337 |
+
updates[weiterlesen_button] = gr.Button(visible=True)
|
338 |
+
|
339 |
+
return updates
|
340 |
+
|
341 |
+
|
342 |
+
# --- Fetch Single Passage Helper ---
|
343 |
+
def fetch_passage_data(passage_id_int):
|
344 |
+
"""Fetches a single passage dictionary from ChromaDB by its integer ID."""
|
345 |
+
if collection is None or passage_id_int < 0:
|
346 |
+
return None
|
347 |
+
try:
|
348 |
+
passage_id_str = str(passage_id_int)
|
349 |
+
result = collection.get(ids=[passage_id_str], include=['documents', 'metadatas'])
|
350 |
+
if result and result.get('ids') and result['ids']:
|
351 |
+
return {
|
352 |
+
'id': result['ids'][0],
|
353 |
+
'doc': result['documents'][0] if result.get('documents') else "N/A",
|
354 |
+
'meta': result['metadatas'][0] if result.get('metadatas') else {},
|
355 |
+
}
|
356 |
+
else:
|
357 |
+
logging.info(f"Passage ID {passage_id_str} not found in collection.")
|
358 |
+
return None
|
359 |
+
except Exception as e:
|
360 |
+
logging.error(f"Error fetching passage ID {passage_id_int} from ChromaDB: {e}", exc_info=True)
|
361 |
+
return None
|
362 |
+
|
363 |
+
|
364 |
+
# --- Move Passage to Reading Area ---
|
365 |
+
def move_to_reading_area(current_index, full_results):
|
366 |
+
"""
|
367 |
+
Moves the selected result passage's text to the reading area below,
|
368 |
+
hides the 'weiterlesen' button, and enables context loading buttons.
|
369 |
+
Keeps the metadata preview in the top area.
|
370 |
+
"""
|
371 |
+
updates = {
|
372 |
+
# Keep top preview area unchanged
|
373 |
+
# Prepare context/reading area
|
374 |
+
context_display: gr.Markdown("_Loading reading passage..._"),
|
375 |
+
displayed_context_passages: [],
|
376 |
+
load_previous_button: gr.Button(visible=False),
|
377 |
+
load_next_button: gr.Button(visible=False),
|
378 |
+
weiterlesen_button: gr.Button(visible=False) # Hide this button
|
379 |
+
}
|
380 |
+
|
381 |
+
if not full_results or current_index < 0 or current_index >= len(full_results):
|
382 |
+
logging.warning(f"Attempted to move passage with invalid state or index. Index: {current_index}, Results Count: {len(full_results)}")
|
383 |
+
updates[context_display] = gr.Markdown("Error: Could not load passage reference.")
|
384 |
+
updates[weiterlesen_button] = gr.Button(visible=False)
|
385 |
+
return updates
|
386 |
+
|
387 |
+
try:
|
388 |
+
target_result_data = full_results[current_index]
|
389 |
+
reading_passage_state_data = {
|
390 |
+
'id': target_result_data.get('id'),
|
391 |
+
'doc': target_result_data.get('document'),
|
392 |
+
'meta': target_result_data.get('metadata'),
|
393 |
+
'role': 'current_reading'
|
394 |
+
}
|
395 |
+
|
396 |
+
if not reading_passage_state_data['id'] or not reading_passage_state_data['doc']:
|
397 |
+
logging.error(f"Cannot move passage: Missing ID or document in result at index {current_index}.")
|
398 |
+
updates[context_display] = gr.Markdown("Error: Selected passage data is incomplete.")
|
399 |
+
updates[weiterlesen_button] = gr.Button(visible=False)
|
400 |
+
return updates
|
401 |
+
|
402 |
+
formatted_passage_md = format_context_markdown([reading_passage_state_data])
|
403 |
+
|
404 |
+
updates[context_display] = gr.Markdown(formatted_passage_md)
|
405 |
+
updates[displayed_context_passages] = [reading_passage_state_data]
|
406 |
+
updates[load_previous_button] = gr.Button(visible=True)
|
407 |
+
updates[load_next_button] = gr.Button(visible=True)
|
408 |
+
|
409 |
+
logging.info(f"Moved passage ID {reading_passage_state_data['id']} to reading area.")
|
410 |
+
return updates
|
411 |
+
|
412 |
+
except Exception as e:
|
413 |
+
logging.error(f"Error moving passage for result index {current_index}: {e}", exc_info=True)
|
414 |
+
updates[context_display] = gr.Markdown(f"Error moving passage to reading area: {e}")
|
415 |
+
updates[weiterlesen_button] = gr.Button(visible=False)
|
416 |
+
return updates
|
417 |
+
|
418 |
+
|
419 |
+
# --- Load More Context Function ---
|
420 |
+
def load_more_context(direction, current_passages_state):
|
421 |
+
"""
|
422 |
+
Loads one more passage either before or after the passages in the reading/context area.
|
423 |
+
Updates the Markdown display and the context state list.
|
424 |
+
"""
|
425 |
+
if collection is None:
|
426 |
+
return "Error: Database connection failed.", current_passages_state
|
427 |
+
if not current_passages_state:
|
428 |
+
logging.warning("Load more context called with empty state.")
|
429 |
+
return "_No reading passage loaded yet._", []
|
430 |
+
|
431 |
+
current_passages_state.sort(key=lambda p: int(p.get('id', -1)))
|
432 |
+
updated_passages = list(current_passages_state)
|
433 |
+
|
434 |
+
try:
|
435 |
+
if direction == 'previous':
|
436 |
+
earliest_id_str = updated_passages[0].get('id')
|
437 |
+
if earliest_id_str is None: return format_context_markdown(updated_passages), updated_passages
|
438 |
+
earliest_id_int = int(earliest_id_str)
|
439 |
+
id_to_fetch = earliest_id_int - 1
|
440 |
+
|
441 |
+
if id_to_fetch < 0:
|
442 |
+
if not (updated_passages[0].get('role') == 'missing' and updated_passages[0].get('id') == '-1'):
|
443 |
+
if updated_passages[0].get('role') == 'missing': updated_passages.pop(0)
|
444 |
+
updated_passages.insert(0, {'id': '-1', 'role': 'missing', 'doc': '_(Beginning of document reached)_'})
|
445 |
+
else:
|
446 |
+
new_passage_data = fetch_passage_data(id_to_fetch)
|
447 |
+
if new_passage_data:
|
448 |
+
new_passage_data['role'] = 'prev'
|
449 |
+
if updated_passages[0].get('role') == 'missing' and updated_passages[0].get('id') == str(id_to_fetch + 1):
|
450 |
+
updated_passages.pop(0)
|
451 |
+
updated_passages.insert(0, new_passage_data)
|
452 |
+
else:
|
453 |
+
if not (updated_passages[0].get('role') == 'missing' and updated_passages[0].get('id') == str(id_to_fetch)):
|
454 |
+
if updated_passages[0].get('role') == 'missing': updated_passages.pop(0)
|
455 |
+
updated_passages.insert(0, {'id': str(id_to_fetch), 'role': 'missing', 'doc': '_(Beginning of document reached)_'})
|
456 |
+
|
457 |
+
elif direction == 'next':
|
458 |
+
latest_id_str = updated_passages[-1].get('id')
|
459 |
+
if latest_id_str is None: return format_context_markdown(updated_passages), updated_passages
|
460 |
+
latest_id_int = int(latest_id_str)
|
461 |
+
id_to_fetch = latest_id_int + 1
|
462 |
+
|
463 |
+
new_passage_data = fetch_passage_data(id_to_fetch)
|
464 |
+
if new_passage_data:
|
465 |
+
new_passage_data['role'] = 'next'
|
466 |
+
if updated_passages[-1].get('role') == 'missing' and updated_passages[-1].get('id') == str(id_to_fetch -1):
|
467 |
+
updated_passages.pop(-1)
|
468 |
+
updated_passages.append(new_passage_data)
|
469 |
+
else:
|
470 |
+
if not (updated_passages[-1].get('role') == 'missing' and updated_passages[-1].get('id') == str(id_to_fetch)):
|
471 |
+
if updated_passages[-1].get('role') == 'missing': updated_passages.pop(-1)
|
472 |
+
updated_passages.append({'id': str(id_to_fetch), 'role': 'missing', 'doc': '_(End of document reached)_'})
|
473 |
+
|
474 |
+
context_md = format_context_markdown(updated_passages)
|
475 |
+
return context_md, updated_passages
|
476 |
+
|
477 |
+
except ValueError:
|
478 |
+
logging.error(f"Error converting passage ID to integer in load_more_context. State: {current_passages_state}", exc_info=True)
|
479 |
+
error_message = format_context_markdown(current_passages_state) + "\n\n**Error processing context expansion.**"
|
480 |
+
return error_message, current_passages_state
|
481 |
+
except Exception as e:
|
482 |
+
logging.error(f"Error loading more context (direction: {direction}): {e}", exc_info=True)
|
483 |
+
error_message = format_context_markdown(current_passages_state) + f"\n\n**Error loading passage: {e}**"
|
484 |
+
return error_message, current_passages_state
|
485 |
+
|
486 |
+
|
487 |
+
# --- Gradio UI Definition ---
|
488 |
+
with gr.Blocks(theme=gr.themes.Default()) as demo:
|
489 |
+
gr.Markdown("# Philosophical Text Search & Context Explorer")
|
490 |
+
|
491 |
+
# --- State Variables ---
|
492 |
+
full_search_results_state = gr.State([])
|
493 |
+
current_result_index_state = gr.State(0)
|
494 |
+
displayed_context_passages = gr.State([])
|
495 |
+
|
496 |
+
# --- Search Input Row ---
|
497 |
+
with gr.Row():
|
498 |
+
query_input = gr.Textbox(label="Enter query", placeholder="z. B. 'Was ist der Unterschied zwischen Herstellen und Handeln?'", lines=2, scale=3)
|
499 |
+
author_dropdown = gr.Dropdown(
|
500 |
+
label="Filter by Author(s) (Optional)",
|
501 |
+
choices=unique_authors,
|
502 |
+
multiselect=True,
|
503 |
+
scale=2
|
504 |
+
)
|
505 |
+
search_button = gr.Button("Search", variant="primary", scale=1)
|
506 |
+
|
507 |
+
# --- Result Navigation Row (MOVED HERE) ---
|
508 |
+
with gr.Row():
|
509 |
+
previous_result_button = gr.Button("⬅️", visible=False)
|
510 |
+
next_result_button = gr.Button("➡️", visible=False)
|
511 |
+
|
512 |
+
gr.Markdown("---") # Separator after search and navigation
|
513 |
+
|
514 |
+
# --- Single Result Display Area ---
|
515 |
+
# Contains the preview text and the "weiterlesen" button
|
516 |
+
with gr.Column(visible=True) as results_area:
|
517 |
+
with gr.Group(visible=False) as single_result_group:
|
518 |
+
result_index_indicator_md = gr.Markdown("Result 0 of 0")
|
519 |
+
single_result_display_md = gr.Markdown("...") # Shows the preview
|
520 |
+
# "weiterlesen" button remains at the end of the preview group
|
521 |
+
weiterlesen_button = gr.Button("weiterlesen", variant="secondary", visible=True)
|
522 |
+
|
523 |
+
gr.Markdown("---") # Separator before reading area
|
524 |
+
|
525 |
+
# --- Context / Reading Area ---
|
526 |
+
with gr.Column(visible=True) as context_area:
|
527 |
+
load_previous_button = gr.Button("⬆️", variant="secondary", visible=False)
|
528 |
+
context_display = gr.Markdown(label="Reading Area")
|
529 |
+
load_next_button = gr.Button("⬇️", variant="secondary", visible=False)
|
530 |
+
|
531 |
+
|
532 |
+
# --- Event Handlers (Wiring remains the same) ---
|
533 |
+
|
534 |
+
# Search Button Action
|
535 |
+
search_outputs = [
|
536 |
+
full_search_results_state, current_result_index_state, single_result_group,
|
537 |
+
result_index_indicator_md, single_result_display_md, previous_result_button,
|
538 |
+
next_result_button, weiterlesen_button, context_display,
|
539 |
+
displayed_context_passages, load_previous_button, load_next_button,
|
540 |
+
]
|
541 |
+
search_button.click(
|
542 |
+
fn=search_philosophical_texts,
|
543 |
+
inputs=[query_input, author_dropdown],
|
544 |
+
outputs=search_outputs
|
545 |
+
)
|
546 |
+
|
547 |
+
# Previous/Next Result Button Actions
|
548 |
+
nav_outputs = [ # Combined list for prev/next
|
549 |
+
single_result_display_md, current_result_index_state, result_index_indicator_md,
|
550 |
+
previous_result_button, next_result_button, weiterlesen_button,
|
551 |
+
context_display, displayed_context_passages,
|
552 |
+
load_previous_button, load_next_button,
|
553 |
+
]
|
554 |
+
previous_result_button.click(
|
555 |
+
fn=navigate_results,
|
556 |
+
inputs=[gr.State('previous'), current_result_index_state, full_search_results_state],
|
557 |
+
outputs=nav_outputs
|
558 |
+
)
|
559 |
+
next_result_button.click(
|
560 |
+
fn=navigate_results,
|
561 |
+
inputs=[gr.State('next'), current_result_index_state, full_search_results_state],
|
562 |
+
outputs=nav_outputs
|
563 |
+
)
|
564 |
+
|
565 |
+
# "weiterlesen" Button Action
|
566 |
+
weiterlesen_outputs = [
|
567 |
+
context_display, displayed_context_passages,
|
568 |
+
load_previous_button, load_next_button,
|
569 |
+
weiterlesen_button # Target button itself to control visibility
|
570 |
+
]
|
571 |
+
weiterlesen_button.click(
|
572 |
+
fn=move_to_reading_area,
|
573 |
+
inputs=[current_result_index_state, full_search_results_state],
|
574 |
+
outputs=weiterlesen_outputs
|
575 |
+
)
|
576 |
+
|
577 |
+
# Load More Context Buttons
|
578 |
+
load_previous_button.click(
|
579 |
+
fn=load_more_context,
|
580 |
+
inputs=[gr.State('previous'), displayed_context_passages],
|
581 |
+
outputs=[context_display, displayed_context_passages]
|
582 |
+
)
|
583 |
+
load_next_button.click(
|
584 |
+
fn=load_more_context,
|
585 |
+
inputs=[gr.State('next'), displayed_context_passages],
|
586 |
+
outputs=[context_display, displayed_context_passages]
|
587 |
+
)
|
588 |
+
|
589 |
+
# --- Launch the Application ---
|
590 |
+
if __name__ == "__main__":
|
591 |
+
if collection is None:
|
592 |
+
print("\n--- ERROR: ChromaDB collection failed to load. UI might not function correctly. Check logs. ---\n")
|
593 |
+
elif not unique_authors:
|
594 |
+
print("\n--- WARNING: No unique authors found in DB metadata. Author filter will be empty. ---\n")
|
595 |
+
|
596 |
+
print("Launching Gradio Interface...")
|
597 |
+
# Make sure debug=True is helpful during testing
|
598 |
+
demo.launch(server_name="0.0.0.0", share=False, debug=True)
|