HonestAnnie commited on
Commit
71b065c
·
verified ·
1 Parent(s): 6c2cadb

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +598 -0
app.py ADDED
@@ -0,0 +1,598 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import chromadb
3
+ import google.generativeai as genai
4
+ import os
5
+ from dotenv import load_dotenv
6
+ import logging
7
+ import functools
8
+ from collections import defaultdict
9
+
10
+ # --- Configuration ---
11
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
12
+
13
+ # Load environment variables (for API Key)
14
+ load_dotenv()
15
+ API_KEY = os.getenv("GEMINI_API_KEY")
16
+ if not API_KEY:
17
+ logging.error("GEMINI_API_KEY not found in environment variables.")
18
+ else:
19
+ try:
20
+ genai.configure(api_key=API_KEY)
21
+ logging.info("Gemini API configured successfully.")
22
+ except Exception as e:
23
+ logging.error(f"Error configuring Gemini API: {e}")
24
+
25
+ # Chroma DB Configuration
26
+ CHROMA_DB_PATH = "./chroma"
27
+ COLLECTION_NAME = "phil_de"
28
+
29
+ # Gemini Embedding Model Configuration
30
+ # Make sure this matches the model used to create the DB (expecting 3072 dims based on past errors)
31
+ EMBEDDING_MODEL = "models/gemini-embedding-exp-03-07"
32
+ logging.info(f"Using embedding model: {EMBEDDING_MODEL}")
33
+
34
+ # --- Constants ---
35
+ MAX_RESULTS = 20
36
+
37
+ # --- ChromaDB Connection and Author Fetching ---
38
+ collection = None
39
+ unique_authors = []
40
+ try:
41
+ client = chromadb.PersistentClient(path=CHROMA_DB_PATH)
42
+ collection = client.get_collection(name=COLLECTION_NAME)
43
+ logging.info(f"Successfully connected to ChromaDB collection '{COLLECTION_NAME}'. Collection count: {collection.count()}")
44
+
45
+ logging.info("Fetching all metadata to extract unique authors...")
46
+ all_metadata = collection.get(include=['metadatas'])
47
+ if all_metadata and 'metadatas' in all_metadata and all_metadata['metadatas']:
48
+ authors_set = set()
49
+ for meta in all_metadata['metadatas']:
50
+ if meta and 'author' in meta and meta['author']:
51
+ authors_set.add(meta['author'])
52
+ unique_authors = sorted(list(authors_set))
53
+ logging.info(f"Found {len(unique_authors)} unique authors.")
54
+ else:
55
+ logging.warning("Could not retrieve metadata or no metadata found to extract authors.")
56
+
57
+ except Exception as e:
58
+ logging.critical(f"FATAL: Could not connect to Chroma DB or fetch authors: {e}", exc_info=True)
59
+ unique_authors = []
60
+
61
+ # --- Embedding Function ---
62
+ def get_embedding(text, task="RETRIEVAL_QUERY"):
63
+ if not API_KEY:
64
+ logging.error("Cannot generate embedding: API key not configured.")
65
+ return None
66
+ if not text:
67
+ logging.warning("Embedding requested for empty text.")
68
+ return None
69
+ try:
70
+ logging.info(f"Generating embedding for task: {task}")
71
+ result = genai.embed_content(
72
+ model=EMBEDDING_MODEL,
73
+ content=text,
74
+ task_type=task
75
+ )
76
+ logging.info("Embedding generated successfully.")
77
+ return result['embedding']
78
+ except Exception as e:
79
+ logging.error(f"Error generating Gemini embedding: {e}", exc_info=True)
80
+ if "model" in str(e).lower() and ("not found" in str(e).lower() or "permission" in str(e).lower()):
81
+ logging.error(f"The configured embedding model '{EMBEDDING_MODEL}' might be incorrect, unavailable, or lack permissions.")
82
+ elif "dimension" in str(e).lower():
83
+ logging.error(f"Potential dimension mismatch issue with model '{EMBEDDING_MODEL}'.")
84
+ return None
85
+
86
+
87
+ # --- Helper: Format Single Result (for top display area) ---
88
+ def format_single_result(result_data, index, total_results):
89
+ """Formats the data for a single result into Markdown for the top preview area."""
90
+ if not result_data:
91
+ return "No result data available."
92
+
93
+ metadata = result_data.get('metadata', {})
94
+ doc = result_data.get('document', "N/A")
95
+ distance = result_data.get('distance', float('inf'))
96
+
97
+ author = metadata.get('author', 'N/A')
98
+ book = metadata.get('book', 'N/A')
99
+ section = metadata.get('section', 'N/A')
100
+
101
+ md_content = ""
102
+ md_content += f"* **Author:** {author}\n"
103
+ md_content += f"* **Book:** {book}\n"
104
+ if section not in ['Unknown', 'N/A', None]:
105
+ md_content += f"* **Section:** {section}\n"
106
+ md_content += f"* **Distance:** {distance:.4f}\n\n"
107
+ md_content += f"> {doc}\n\n"
108
+ return md_content
109
+
110
+ # --- Helper: Format Reading Passage (Deprecated - formatting now done in format_context_markdown) ---
111
+ # def format_reading_passage(passage_data): # No longer needed as separate function
112
+ # ...
113
+
114
+ # --- Context Formatting Helper ---
115
+ def format_context_markdown(passages):
116
+ """
117
+ Formats a list of passage dictionaries into a seamless Markdown string
118
+ for the reading area, *without* a header.
119
+ """
120
+ if not passages:
121
+ return ""
122
+
123
+ valid_passages = [p for p in passages if p and p.get('id') is not None]
124
+ valid_passages.sort(key=lambda p: int(p.get('id', -1)))
125
+
126
+ if not valid_passages:
127
+ return ""
128
+
129
+ # Combine Passage Texts
130
+ full_text = ""
131
+ for i, passage in enumerate(valid_passages):
132
+ doc = passage.get('doc', '_Passage text missing_')
133
+ role = passage.get('role', 'context') # Includes 'current_reading', 'prev', 'next'
134
+
135
+ if role == 'missing':
136
+ continue # Skip placeholders like "Beginning/End of document"
137
+
138
+ full_text += doc
139
+
140
+ # Add separator if not the last passage and next isn't missing
141
+ if i < len(valid_passages) - 1:
142
+ if valid_passages[i+1].get('role') != 'missing':
143
+ full_text += "\n\n"
144
+
145
+ return full_text
146
+
147
+ # --- Search Function (Complete) ---
148
+ def search_philosophical_texts(query, selected_authors):
149
+ """
150
+ Performs search, stores all results in state, displays the first result.
151
+ Returns updates for multiple components and state variables.
152
+ """
153
+ # Initialize updates dictionary with default states
154
+ updates = {
155
+ full_search_results_state: [],
156
+ current_result_index_state: 0,
157
+ single_result_group: gr.Group(visible=False),
158
+ result_index_indicator_md: gr.Markdown(""),
159
+ single_result_display_md: gr.Markdown(""),
160
+ previous_result_button: gr.Button(visible=False),
161
+ next_result_button: gr.Button(visible=False),
162
+ weiterlesen_button: gr.Button(visible=False), # Default to hidden
163
+ context_display: gr.Markdown(""),
164
+ displayed_context_passages: [],
165
+ load_previous_button: gr.Button(visible=False),
166
+ load_next_button: gr.Button(visible=False),
167
+ }
168
+
169
+ # --- Pre-computation Checks ---
170
+ if collection is None:
171
+ logging.error("Search attempted but ChromaDB collection is not available.")
172
+ updates[single_result_display_md] = gr.Markdown("Error: Database connection failed.")
173
+ updates[single_result_group] = gr.Group(visible=True) # Show group to display error
174
+ return updates
175
+
176
+ if not query:
177
+ logging.warning("Empty query received.")
178
+ updates[single_result_display_md] = gr.Markdown("Please enter a query.")
179
+ updates[single_result_group] = gr.Group(visible=True) # Show group to display message
180
+ return updates
181
+
182
+ logging.info(f"Received query: '{query[:50]}...'")
183
+ logging.info(f"Selected Authors for filtering: {selected_authors}")
184
+
185
+ # --- Embedding ---
186
+ query_embedding = get_embedding(query, task="RETRIEVAL_QUERY")
187
+ if query_embedding is None:
188
+ logging.error("Failed to generate query embedding.")
189
+ updates[single_result_display_md] = gr.Markdown("Error: Failed to generate query embedding.")
190
+ updates[single_result_group] = gr.Group(visible=True)
191
+ return updates
192
+
193
+ # --- Filtering ---
194
+ where_filter = None
195
+ if selected_authors:
196
+ where_filter = {"author": {"$in": selected_authors}}
197
+ logging.info(f"Applying WHERE filter: {where_filter}")
198
+
199
+ # --- Query Execution and Result Processing ---
200
+ try:
201
+ logging.info(f"Querying collection '{COLLECTION_NAME}' for top {MAX_RESULTS} results.")
202
+
203
+ # --->>> ACTUAL QUERY CALL <<<---
204
+ results = collection.query(
205
+ query_embeddings=[query_embedding],
206
+ n_results=MAX_RESULTS,
207
+ where=where_filter,
208
+ include=['documents', 'metadatas', 'distances'] # IDs are included by default
209
+ )
210
+ # --->>> END QUERY CALL <<<---
211
+
212
+ # Process results if found
213
+ all_results_data = []
214
+ if results and results.get('ids') and results['ids'][0]:
215
+ num_found = len(results['ids'][0])
216
+ logging.info(f"Query successful. Found {num_found} results.")
217
+
218
+ ids_list = results['ids'][0]
219
+ docs_list = results['documents'][0]
220
+ metadatas_list = results['metadatas'][0]
221
+ distances_list = results['distances'][0]
222
+
223
+ # --->>> ACTUAL RESULT PROCESSING LOOP <<<---
224
+ for i in range(num_found):
225
+ # Validate ID conversion (just in case)
226
+ try:
227
+ _ = int(ids_list[i]) # Check if convertible
228
+ except ValueError:
229
+ logging.warning(f"Skipping result with non-integer ID: {ids_list[i]}")
230
+ continue
231
+
232
+ all_results_data.append({
233
+ "id": ids_list[i],
234
+ "document": docs_list[i],
235
+ "metadata": metadatas_list[i],
236
+ "distance": distances_list[i]
237
+ })
238
+ # --->>> END RESULT PROCESSING LOOP <<<---
239
+
240
+ if all_results_data:
241
+ # Results found and processed successfully
242
+ updates[full_search_results_state] = all_results_data
243
+ updates[current_result_index_state] = 0
244
+ first_result_md = format_single_result(all_results_data[0], 0, len(all_results_data))
245
+ updates[single_result_display_md] = gr.Markdown(first_result_md)
246
+ updates[single_result_group] = gr.Group(visible=True) # Show group
247
+ updates[result_index_indicator_md] = gr.Markdown(f"Result **1** of **{len(all_results_data)}**")
248
+ updates[previous_result_button] = gr.Button(visible=True, interactive=False)
249
+ updates[next_result_button] = gr.Button(visible=True, interactive=(len(all_results_data) > 1))
250
+ updates[weiterlesen_button] = gr.Button(visible=True) # Show this button
251
+ else:
252
+ # Query returned results, but none were valid after processing
253
+ logging.info("No valid results found after filtering/validation.")
254
+ updates[single_result_display_md] = gr.Markdown("No results found matching your query and filters.")
255
+ updates[single_result_group] = gr.Group(visible=True) # Show message
256
+ updates[weiterlesen_button] = gr.Button(visible=False) # Hide button
257
+
258
+ else:
259
+ # Query returned no results
260
+ logging.info("No results found for the query (or matching the filter).")
261
+ updates[single_result_display_md] = gr.Markdown("No results found matching your query and filters.")
262
+ updates[single_result_group] = gr.Group(visible=True) # Show message
263
+ updates[weiterlesen_button] = gr.Button(visible=False) # Hide button
264
+
265
+ return updates
266
+
267
+ # --->>> ACTUAL EXCEPTION HANDLING <<<---
268
+ except Exception as e:
269
+ logging.error(f"Error querying ChromaDB or processing results: {e}", exc_info=True)
270
+
271
+ # Define error_msg based on the exception
272
+ if "dimension" in str(e).lower():
273
+ error_msg = "**Error:** Database search failed due to embedding mismatch. Please check configuration."
274
+ else:
275
+ # Display the actual error message type from the exception
276
+ error_msg = f"**Error:** An unexpected error occurred during search. See logs for details. ({type(e).__name__})"
277
+
278
+ # Update the UI to show the error message
279
+ updates[single_result_display_md] = gr.Markdown(error_msg)
280
+ updates[single_result_group] = gr.Group(visible=True) # Show the group to display the error
281
+ # Reset state on error
282
+ updates[full_search_results_state] = []
283
+ updates[current_result_index_state] = 0
284
+ updates[weiterlesen_button] = gr.Button(visible=False)
285
+ updates[previous_result_button] = gr.Button(visible=False)
286
+ updates[next_result_button] = gr.Button(visible=False)
287
+ updates[result_index_indicator_md] = gr.Markdown("")
288
+ updates[context_display] = gr.Markdown("")
289
+ updates[displayed_context_passages] = []
290
+ updates[load_previous_button] = gr.Button(visible=False)
291
+ updates[load_next_button] = gr.Button(visible=False)
292
+
293
+ return updates
294
+ # --->>> END EXCEPTION HANDLING <<<---
295
+
296
+
297
+ # --- Result Navigation Function ---
298
+ def navigate_results(direction, current_index, full_results):
299
+ """Handles moving between search results in the top display area."""
300
+ updates = {}
301
+ if not full_results:
302
+ logging.warning("Navigate called with no results in state.")
303
+ return { current_result_index_state: 0 }
304
+
305
+ total_results = len(full_results)
306
+ new_index = current_index
307
+
308
+ if direction == 'previous':
309
+ new_index = max(0, current_index - 1)
310
+ elif direction == 'next':
311
+ new_index = min(total_results - 1, current_index + 1)
312
+
313
+ # Only update display if the index actually changed
314
+ if new_index != current_index:
315
+ logging.info(f"Navigating from result index {current_index} to {new_index}")
316
+ result_data = full_results[new_index]
317
+ result_md = format_single_result(result_data, new_index, total_results)
318
+ updates[single_result_display_md] = gr.Markdown(result_md)
319
+ updates[current_result_index_state] = new_index
320
+ updates[result_index_indicator_md] = gr.Markdown(f"Result **{new_index + 1}** of **{total_results}**")
321
+ updates[context_display] = gr.Markdown("") # Clear reading area
322
+ updates[displayed_context_passages] = []
323
+ updates[load_previous_button] = gr.Button(visible=False)
324
+ updates[load_next_button] = gr.Button(visible=False)
325
+ updates[weiterlesen_button] = gr.Button(visible=True) # Make visible again
326
+
327
+ # Update navigation button interactivity based on the *new* index
328
+ updates[previous_result_button] = gr.Button(interactive=(new_index > 0))
329
+ updates[next_result_button] = gr.Button(interactive=(new_index < total_results - 1))
330
+
331
+ # If index didn't change, ensure button states are still returned correctly
332
+ if new_index == current_index:
333
+ # Ensure weiterlesen visibility is returned if index didn't change
334
+ # (it should already be visible unless user clicked at boundary where it was hidden)
335
+ # Let's explicitly set it visible for safety upon any nav click if results exist
336
+ if total_results > 0:
337
+ updates[weiterlesen_button] = gr.Button(visible=True)
338
+
339
+ return updates
340
+
341
+
342
+ # --- Fetch Single Passage Helper ---
343
+ def fetch_passage_data(passage_id_int):
344
+ """Fetches a single passage dictionary from ChromaDB by its integer ID."""
345
+ if collection is None or passage_id_int < 0:
346
+ return None
347
+ try:
348
+ passage_id_str = str(passage_id_int)
349
+ result = collection.get(ids=[passage_id_str], include=['documents', 'metadatas'])
350
+ if result and result.get('ids') and result['ids']:
351
+ return {
352
+ 'id': result['ids'][0],
353
+ 'doc': result['documents'][0] if result.get('documents') else "N/A",
354
+ 'meta': result['metadatas'][0] if result.get('metadatas') else {},
355
+ }
356
+ else:
357
+ logging.info(f"Passage ID {passage_id_str} not found in collection.")
358
+ return None
359
+ except Exception as e:
360
+ logging.error(f"Error fetching passage ID {passage_id_int} from ChromaDB: {e}", exc_info=True)
361
+ return None
362
+
363
+
364
+ # --- Move Passage to Reading Area ---
365
+ def move_to_reading_area(current_index, full_results):
366
+ """
367
+ Moves the selected result passage's text to the reading area below,
368
+ hides the 'weiterlesen' button, and enables context loading buttons.
369
+ Keeps the metadata preview in the top area.
370
+ """
371
+ updates = {
372
+ # Keep top preview area unchanged
373
+ # Prepare context/reading area
374
+ context_display: gr.Markdown("_Loading reading passage..._"),
375
+ displayed_context_passages: [],
376
+ load_previous_button: gr.Button(visible=False),
377
+ load_next_button: gr.Button(visible=False),
378
+ weiterlesen_button: gr.Button(visible=False) # Hide this button
379
+ }
380
+
381
+ if not full_results or current_index < 0 or current_index >= len(full_results):
382
+ logging.warning(f"Attempted to move passage with invalid state or index. Index: {current_index}, Results Count: {len(full_results)}")
383
+ updates[context_display] = gr.Markdown("Error: Could not load passage reference.")
384
+ updates[weiterlesen_button] = gr.Button(visible=False)
385
+ return updates
386
+
387
+ try:
388
+ target_result_data = full_results[current_index]
389
+ reading_passage_state_data = {
390
+ 'id': target_result_data.get('id'),
391
+ 'doc': target_result_data.get('document'),
392
+ 'meta': target_result_data.get('metadata'),
393
+ 'role': 'current_reading'
394
+ }
395
+
396
+ if not reading_passage_state_data['id'] or not reading_passage_state_data['doc']:
397
+ logging.error(f"Cannot move passage: Missing ID or document in result at index {current_index}.")
398
+ updates[context_display] = gr.Markdown("Error: Selected passage data is incomplete.")
399
+ updates[weiterlesen_button] = gr.Button(visible=False)
400
+ return updates
401
+
402
+ formatted_passage_md = format_context_markdown([reading_passage_state_data])
403
+
404
+ updates[context_display] = gr.Markdown(formatted_passage_md)
405
+ updates[displayed_context_passages] = [reading_passage_state_data]
406
+ updates[load_previous_button] = gr.Button(visible=True)
407
+ updates[load_next_button] = gr.Button(visible=True)
408
+
409
+ logging.info(f"Moved passage ID {reading_passage_state_data['id']} to reading area.")
410
+ return updates
411
+
412
+ except Exception as e:
413
+ logging.error(f"Error moving passage for result index {current_index}: {e}", exc_info=True)
414
+ updates[context_display] = gr.Markdown(f"Error moving passage to reading area: {e}")
415
+ updates[weiterlesen_button] = gr.Button(visible=False)
416
+ return updates
417
+
418
+
419
+ # --- Load More Context Function ---
420
+ def load_more_context(direction, current_passages_state):
421
+ """
422
+ Loads one more passage either before or after the passages in the reading/context area.
423
+ Updates the Markdown display and the context state list.
424
+ """
425
+ if collection is None:
426
+ return "Error: Database connection failed.", current_passages_state
427
+ if not current_passages_state:
428
+ logging.warning("Load more context called with empty state.")
429
+ return "_No reading passage loaded yet._", []
430
+
431
+ current_passages_state.sort(key=lambda p: int(p.get('id', -1)))
432
+ updated_passages = list(current_passages_state)
433
+
434
+ try:
435
+ if direction == 'previous':
436
+ earliest_id_str = updated_passages[0].get('id')
437
+ if earliest_id_str is None: return format_context_markdown(updated_passages), updated_passages
438
+ earliest_id_int = int(earliest_id_str)
439
+ id_to_fetch = earliest_id_int - 1
440
+
441
+ if id_to_fetch < 0:
442
+ if not (updated_passages[0].get('role') == 'missing' and updated_passages[0].get('id') == '-1'):
443
+ if updated_passages[0].get('role') == 'missing': updated_passages.pop(0)
444
+ updated_passages.insert(0, {'id': '-1', 'role': 'missing', 'doc': '_(Beginning of document reached)_'})
445
+ else:
446
+ new_passage_data = fetch_passage_data(id_to_fetch)
447
+ if new_passage_data:
448
+ new_passage_data['role'] = 'prev'
449
+ if updated_passages[0].get('role') == 'missing' and updated_passages[0].get('id') == str(id_to_fetch + 1):
450
+ updated_passages.pop(0)
451
+ updated_passages.insert(0, new_passage_data)
452
+ else:
453
+ if not (updated_passages[0].get('role') == 'missing' and updated_passages[0].get('id') == str(id_to_fetch)):
454
+ if updated_passages[0].get('role') == 'missing': updated_passages.pop(0)
455
+ updated_passages.insert(0, {'id': str(id_to_fetch), 'role': 'missing', 'doc': '_(Beginning of document reached)_'})
456
+
457
+ elif direction == 'next':
458
+ latest_id_str = updated_passages[-1].get('id')
459
+ if latest_id_str is None: return format_context_markdown(updated_passages), updated_passages
460
+ latest_id_int = int(latest_id_str)
461
+ id_to_fetch = latest_id_int + 1
462
+
463
+ new_passage_data = fetch_passage_data(id_to_fetch)
464
+ if new_passage_data:
465
+ new_passage_data['role'] = 'next'
466
+ if updated_passages[-1].get('role') == 'missing' and updated_passages[-1].get('id') == str(id_to_fetch -1):
467
+ updated_passages.pop(-1)
468
+ updated_passages.append(new_passage_data)
469
+ else:
470
+ if not (updated_passages[-1].get('role') == 'missing' and updated_passages[-1].get('id') == str(id_to_fetch)):
471
+ if updated_passages[-1].get('role') == 'missing': updated_passages.pop(-1)
472
+ updated_passages.append({'id': str(id_to_fetch), 'role': 'missing', 'doc': '_(End of document reached)_'})
473
+
474
+ context_md = format_context_markdown(updated_passages)
475
+ return context_md, updated_passages
476
+
477
+ except ValueError:
478
+ logging.error(f"Error converting passage ID to integer in load_more_context. State: {current_passages_state}", exc_info=True)
479
+ error_message = format_context_markdown(current_passages_state) + "\n\n**Error processing context expansion.**"
480
+ return error_message, current_passages_state
481
+ except Exception as e:
482
+ logging.error(f"Error loading more context (direction: {direction}): {e}", exc_info=True)
483
+ error_message = format_context_markdown(current_passages_state) + f"\n\n**Error loading passage: {e}**"
484
+ return error_message, current_passages_state
485
+
486
+
487
+ # --- Gradio UI Definition ---
488
+ with gr.Blocks(theme=gr.themes.Default()) as demo:
489
+ gr.Markdown("# Philosophical Text Search & Context Explorer")
490
+
491
+ # --- State Variables ---
492
+ full_search_results_state = gr.State([])
493
+ current_result_index_state = gr.State(0)
494
+ displayed_context_passages = gr.State([])
495
+
496
+ # --- Search Input Row ---
497
+ with gr.Row():
498
+ query_input = gr.Textbox(label="Enter query", placeholder="z. B. 'Was ist der Unterschied zwischen Herstellen und Handeln?'", lines=2, scale=3)
499
+ author_dropdown = gr.Dropdown(
500
+ label="Filter by Author(s) (Optional)",
501
+ choices=unique_authors,
502
+ multiselect=True,
503
+ scale=2
504
+ )
505
+ search_button = gr.Button("Search", variant="primary", scale=1)
506
+
507
+ # --- Result Navigation Row (MOVED HERE) ---
508
+ with gr.Row():
509
+ previous_result_button = gr.Button("⬅️", visible=False)
510
+ next_result_button = gr.Button("➡️", visible=False)
511
+
512
+ gr.Markdown("---") # Separator after search and navigation
513
+
514
+ # --- Single Result Display Area ---
515
+ # Contains the preview text and the "weiterlesen" button
516
+ with gr.Column(visible=True) as results_area:
517
+ with gr.Group(visible=False) as single_result_group:
518
+ result_index_indicator_md = gr.Markdown("Result 0 of 0")
519
+ single_result_display_md = gr.Markdown("...") # Shows the preview
520
+ # "weiterlesen" button remains at the end of the preview group
521
+ weiterlesen_button = gr.Button("weiterlesen", variant="secondary", visible=True)
522
+
523
+ gr.Markdown("---") # Separator before reading area
524
+
525
+ # --- Context / Reading Area ---
526
+ with gr.Column(visible=True) as context_area:
527
+ load_previous_button = gr.Button("⬆️", variant="secondary", visible=False)
528
+ context_display = gr.Markdown(label="Reading Area")
529
+ load_next_button = gr.Button("⬇️", variant="secondary", visible=False)
530
+
531
+
532
+ # --- Event Handlers (Wiring remains the same) ---
533
+
534
+ # Search Button Action
535
+ search_outputs = [
536
+ full_search_results_state, current_result_index_state, single_result_group,
537
+ result_index_indicator_md, single_result_display_md, previous_result_button,
538
+ next_result_button, weiterlesen_button, context_display,
539
+ displayed_context_passages, load_previous_button, load_next_button,
540
+ ]
541
+ search_button.click(
542
+ fn=search_philosophical_texts,
543
+ inputs=[query_input, author_dropdown],
544
+ outputs=search_outputs
545
+ )
546
+
547
+ # Previous/Next Result Button Actions
548
+ nav_outputs = [ # Combined list for prev/next
549
+ single_result_display_md, current_result_index_state, result_index_indicator_md,
550
+ previous_result_button, next_result_button, weiterlesen_button,
551
+ context_display, displayed_context_passages,
552
+ load_previous_button, load_next_button,
553
+ ]
554
+ previous_result_button.click(
555
+ fn=navigate_results,
556
+ inputs=[gr.State('previous'), current_result_index_state, full_search_results_state],
557
+ outputs=nav_outputs
558
+ )
559
+ next_result_button.click(
560
+ fn=navigate_results,
561
+ inputs=[gr.State('next'), current_result_index_state, full_search_results_state],
562
+ outputs=nav_outputs
563
+ )
564
+
565
+ # "weiterlesen" Button Action
566
+ weiterlesen_outputs = [
567
+ context_display, displayed_context_passages,
568
+ load_previous_button, load_next_button,
569
+ weiterlesen_button # Target button itself to control visibility
570
+ ]
571
+ weiterlesen_button.click(
572
+ fn=move_to_reading_area,
573
+ inputs=[current_result_index_state, full_search_results_state],
574
+ outputs=weiterlesen_outputs
575
+ )
576
+
577
+ # Load More Context Buttons
578
+ load_previous_button.click(
579
+ fn=load_more_context,
580
+ inputs=[gr.State('previous'), displayed_context_passages],
581
+ outputs=[context_display, displayed_context_passages]
582
+ )
583
+ load_next_button.click(
584
+ fn=load_more_context,
585
+ inputs=[gr.State('next'), displayed_context_passages],
586
+ outputs=[context_display, displayed_context_passages]
587
+ )
588
+
589
+ # --- Launch the Application ---
590
+ if __name__ == "__main__":
591
+ if collection is None:
592
+ print("\n--- ERROR: ChromaDB collection failed to load. UI might not function correctly. Check logs. ---\n")
593
+ elif not unique_authors:
594
+ print("\n--- WARNING: No unique authors found in DB metadata. Author filter will be empty. ---\n")
595
+
596
+ print("Launching Gradio Interface...")
597
+ # Make sure debug=True is helpful during testing
598
+ demo.launch(server_name="0.0.0.0", share=False, debug=True)