Spaces:
Sleeping
Sleeping
Nagesh Muralidhar
commited on
Commit
·
4972b4c
1
Parent(s):
6c71315
midterm-submission
Browse files- podcraft/src/pages/PodcastForm.tsx +19 -12
- server/main.py +21 -11
podcraft/src/pages/PodcastForm.tsx
CHANGED
@@ -44,7 +44,10 @@ const PodcastForm: React.FC = () => {
|
|
44 |
useEffect(() => {
|
45 |
const fetchPodcastAndContext = async () => {
|
46 |
try {
|
47 |
-
if (!id)
|
|
|
|
|
|
|
48 |
|
49 |
// Fetch podcast details
|
50 |
const response = await fetch(`${API_URL}/api/audio-list`);
|
@@ -69,7 +72,7 @@ const PodcastForm: React.FC = () => {
|
|
69 |
const category = categoryWithExt.replace('.mp3', '');
|
70 |
|
71 |
return {
|
72 |
-
id: index + 1,
|
73 |
title: descriptionPart.replace(/_/g, ' ').replace(/^\w/, c => c.toUpperCase()),
|
74 |
description: `A debate exploring ${queryPart.replace(/_/g, ' ')}`,
|
75 |
audio_file: `${API_URL}${file.path}`,
|
@@ -79,15 +82,18 @@ const PodcastForm: React.FC = () => {
|
|
79 |
});
|
80 |
|
81 |
const selectedPodcast = podcastList.find(p => p.id === parseInt(id));
|
82 |
-
if (selectedPodcast) {
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
|
|
|
|
|
|
91 |
}
|
92 |
} catch (err) {
|
93 |
console.error('Error fetching podcast:', err);
|
@@ -124,7 +130,8 @@ const PodcastForm: React.FC = () => {
|
|
124 |
});
|
125 |
|
126 |
if (!response.ok) {
|
127 |
-
|
|
|
128 |
}
|
129 |
|
130 |
const data = await response.json();
|
|
|
44 |
useEffect(() => {
|
45 |
const fetchPodcastAndContext = async () => {
|
46 |
try {
|
47 |
+
if (!id) {
|
48 |
+
setError("No podcast ID provided");
|
49 |
+
return;
|
50 |
+
}
|
51 |
|
52 |
// Fetch podcast details
|
53 |
const response = await fetch(`${API_URL}/api/audio-list`);
|
|
|
72 |
const category = categoryWithExt.replace('.mp3', '');
|
73 |
|
74 |
return {
|
75 |
+
id: index + 1, // Use 1-based index for consistency
|
76 |
title: descriptionPart.replace(/_/g, ' ').replace(/^\w/, c => c.toUpperCase()),
|
77 |
description: `A debate exploring ${queryPart.replace(/_/g, ' ')}`,
|
78 |
audio_file: `${API_URL}${file.path}`,
|
|
|
82 |
});
|
83 |
|
84 |
const selectedPodcast = podcastList.find(p => p.id === parseInt(id));
|
85 |
+
if (!selectedPodcast) {
|
86 |
+
throw new Error(`Podcast with ID ${id} not found`);
|
87 |
+
}
|
88 |
+
setPodcast(selectedPodcast);
|
89 |
+
|
90 |
+
// Fetch podcast context
|
91 |
+
const contextResponse = await fetch(`${API_URL}/api/podcast/${id}/context`);
|
92 |
+
if (contextResponse.ok) {
|
93 |
+
const contextData: PodcastContext = await contextResponse.json();
|
94 |
+
setPodcastContext(contextData);
|
95 |
+
} else {
|
96 |
+
console.warn(`Could not fetch context for podcast ${id}`);
|
97 |
}
|
98 |
} catch (err) {
|
99 |
console.error('Error fetching podcast:', err);
|
|
|
130 |
});
|
131 |
|
132 |
if (!response.ok) {
|
133 |
+
const errorData = await response.text();
|
134 |
+
throw new Error(`Server error: ${response.status} ${errorData}`);
|
135 |
}
|
136 |
|
137 |
const data = await response.json();
|
server/main.py
CHANGED
@@ -333,7 +333,7 @@ async def get_podcast_context(podcast_id: str):
|
|
333 |
logger.error(f"Error in get_podcast_context: {str(e)}", exc_info=True)
|
334 |
raise HTTPException(status_code=500, detail=str(e))
|
335 |
|
336 |
-
@
|
337 |
async def podcast_chat(podcast_id: str, request: PodcastChatRequest):
|
338 |
"""Handle chat messages for a specific podcast."""
|
339 |
try:
|
@@ -342,34 +342,39 @@ async def podcast_chat(podcast_id: str, request: PodcastChatRequest):
|
|
342 |
# Path to transcripts file
|
343 |
transcripts_file = os.path.join(os.path.dirname(__file__), "transcripts", "podcasts.json")
|
344 |
|
345 |
-
# Check if transcripts file exists
|
346 |
if not os.path.exists(transcripts_file):
|
347 |
-
logger.
|
348 |
-
|
349 |
-
json.dump([], f)
|
350 |
-
raise HTTPException(status_code=404, detail="No transcript available for this podcast yet")
|
351 |
|
352 |
# Read transcripts
|
353 |
try:
|
354 |
with open(transcripts_file, 'r') as f:
|
355 |
transcripts = json.load(f)
|
|
|
356 |
except json.JSONDecodeError as e:
|
357 |
-
logger.error(f"Error
|
358 |
-
raise HTTPException(status_code=500, detail="Error reading
|
359 |
|
360 |
# Convert podcast_id to zero-based index
|
361 |
try:
|
362 |
podcast_index = int(podcast_id) - 1
|
363 |
if podcast_index < 0 or podcast_index >= len(transcripts):
|
|
|
364 |
raise ValueError(f"Invalid podcast ID: {podcast_id}")
|
365 |
except ValueError as e:
|
|
|
366 |
raise HTTPException(status_code=404, detail=str(e))
|
367 |
|
368 |
# Get podcast transcript
|
369 |
try:
|
370 |
podcast_transcript = transcripts[podcast_index].get("podcastScript")
|
371 |
if not podcast_transcript:
|
|
|
372 |
raise HTTPException(status_code=404, detail="No transcript content found for this podcast")
|
|
|
|
|
|
|
373 |
except (IndexError, KeyError) as e:
|
374 |
logger.error(f"Error accessing podcast transcript: {str(e)}")
|
375 |
raise HTTPException(status_code=404, detail="Transcript not found for this podcast")
|
@@ -383,8 +388,10 @@ async def podcast_chat(podcast_id: str, request: PodcastChatRequest):
|
|
383 |
|
384 |
# Use split_text for strings instead of split_documents
|
385 |
chunks = text_splitter.split_text(podcast_transcript)
|
|
|
386 |
|
387 |
if not chunks:
|
|
|
388 |
raise HTTPException(status_code=404, detail="No content chunks found in transcript")
|
389 |
|
390 |
# Initialize embedding model
|
@@ -400,6 +407,7 @@ async def podcast_chat(podcast_id: str, request: PodcastChatRequest):
|
|
400 |
location=":memory:", # Use in-memory storage
|
401 |
collection_name=collection_name
|
402 |
)
|
|
|
403 |
|
404 |
# Configure the retriever with search parameters
|
405 |
qdrant_retriever = vectorstore.as_retriever(
|
@@ -428,7 +436,8 @@ async def podcast_chat(podcast_id: str, request: PodcastChatRequest):
|
|
428 |
|
429 |
# Add logging for the retrieved documents and final prompt
|
430 |
def get_context_and_log(input_dict):
|
431 |
-
|
|
|
432 |
logger.info("Retrieved context from podcast:")
|
433 |
logger.info("-" * 50)
|
434 |
logger.info(f"Context:\n{context}")
|
@@ -447,11 +456,12 @@ async def podcast_chat(podcast_id: str, request: PodcastChatRequest):
|
|
447 |
|
448 |
# Get response
|
449 |
response = chain.invoke({"question": request.message})
|
|
|
450 |
|
451 |
return PodcastChatResponse(response=response.content)
|
452 |
|
453 |
-
except HTTPException
|
454 |
-
raise
|
455 |
except Exception as e:
|
456 |
logger.error(f"Error in podcast chat: {str(e)}", exc_info=True)
|
457 |
raise HTTPException(status_code=500, detail=str(e))
|
|
|
333 |
logger.error(f"Error in get_podcast_context: {str(e)}", exc_info=True)
|
334 |
raise HTTPException(status_code=500, detail=str(e))
|
335 |
|
336 |
+
@app.post("/podcast-chat/{podcast_id}", response_model=PodcastChatResponse)
|
337 |
async def podcast_chat(podcast_id: str, request: PodcastChatRequest):
|
338 |
"""Handle chat messages for a specific podcast."""
|
339 |
try:
|
|
|
342 |
# Path to transcripts file
|
343 |
transcripts_file = os.path.join(os.path.dirname(__file__), "transcripts", "podcasts.json")
|
344 |
|
345 |
+
# Check if transcripts file exists
|
346 |
if not os.path.exists(transcripts_file):
|
347 |
+
logger.error("Transcripts file not found")
|
348 |
+
raise HTTPException(status_code=404, detail="Transcripts file not found")
|
|
|
|
|
349 |
|
350 |
# Read transcripts
|
351 |
try:
|
352 |
with open(transcripts_file, 'r') as f:
|
353 |
transcripts = json.load(f)
|
354 |
+
logger.info(f"Loaded {len(transcripts)} transcripts")
|
355 |
except json.JSONDecodeError as e:
|
356 |
+
logger.error(f"Error decoding transcripts file: {str(e)}")
|
357 |
+
raise HTTPException(status_code=500, detail="Error reading transcripts file")
|
358 |
|
359 |
# Convert podcast_id to zero-based index
|
360 |
try:
|
361 |
podcast_index = int(podcast_id) - 1
|
362 |
if podcast_index < 0 or podcast_index >= len(transcripts):
|
363 |
+
logger.error(f"Invalid podcast index: {podcast_index} (total transcripts: {len(transcripts)})")
|
364 |
raise ValueError(f"Invalid podcast ID: {podcast_id}")
|
365 |
except ValueError as e:
|
366 |
+
logger.error(f"Error converting podcast ID: {str(e)}")
|
367 |
raise HTTPException(status_code=404, detail=str(e))
|
368 |
|
369 |
# Get podcast transcript
|
370 |
try:
|
371 |
podcast_transcript = transcripts[podcast_index].get("podcastScript")
|
372 |
if not podcast_transcript:
|
373 |
+
logger.error(f"No transcript content found for podcast {podcast_id}")
|
374 |
raise HTTPException(status_code=404, detail="No transcript content found for this podcast")
|
375 |
+
|
376 |
+
logger.info(f"Found transcript for podcast {podcast_id}")
|
377 |
+
logger.debug(f"Transcript content: {podcast_transcript[:200]}...") # Log first 200 chars
|
378 |
except (IndexError, KeyError) as e:
|
379 |
logger.error(f"Error accessing podcast transcript: {str(e)}")
|
380 |
raise HTTPException(status_code=404, detail="Transcript not found for this podcast")
|
|
|
388 |
|
389 |
# Use split_text for strings instead of split_documents
|
390 |
chunks = text_splitter.split_text(podcast_transcript)
|
391 |
+
logger.info(f"Split transcript into {len(chunks)} chunks")
|
392 |
|
393 |
if not chunks:
|
394 |
+
logger.error("No content chunks found in transcript")
|
395 |
raise HTTPException(status_code=404, detail="No content chunks found in transcript")
|
396 |
|
397 |
# Initialize embedding model
|
|
|
407 |
location=":memory:", # Use in-memory storage
|
408 |
collection_name=collection_name
|
409 |
)
|
410 |
+
logger.info(f"Created vector store for podcast {podcast_id}")
|
411 |
|
412 |
# Configure the retriever with search parameters
|
413 |
qdrant_retriever = vectorstore.as_retriever(
|
|
|
436 |
|
437 |
# Add logging for the retrieved documents and final prompt
|
438 |
def get_context_and_log(input_dict):
|
439 |
+
retrieved_docs = qdrant_retriever.get_relevant_documents(input_dict["question"])
|
440 |
+
context = format_docs(retrieved_docs)
|
441 |
logger.info("Retrieved context from podcast:")
|
442 |
logger.info("-" * 50)
|
443 |
logger.info(f"Context:\n{context}")
|
|
|
456 |
|
457 |
# Get response
|
458 |
response = chain.invoke({"question": request.message})
|
459 |
+
logger.info(f"Generated response: {response.content}")
|
460 |
|
461 |
return PodcastChatResponse(response=response.content)
|
462 |
|
463 |
+
except HTTPException:
|
464 |
+
raise
|
465 |
except Exception as e:
|
466 |
logger.error(f"Error in podcast chat: {str(e)}", exc_info=True)
|
467 |
raise HTTPException(status_code=500, detail=str(e))
|