Nagesh Muralidhar commited on
Commit
4972b4c
·
1 Parent(s): 6c71315

midterm-submission

Browse files
Files changed (2) hide show
  1. podcraft/src/pages/PodcastForm.tsx +19 -12
  2. server/main.py +21 -11
podcraft/src/pages/PodcastForm.tsx CHANGED
@@ -44,7 +44,10 @@ const PodcastForm: React.FC = () => {
44
  useEffect(() => {
45
  const fetchPodcastAndContext = async () => {
46
  try {
47
- if (!id) return;
 
 
 
48
 
49
  // Fetch podcast details
50
  const response = await fetch(`${API_URL}/api/audio-list`);
@@ -69,7 +72,7 @@ const PodcastForm: React.FC = () => {
69
  const category = categoryWithExt.replace('.mp3', '');
70
 
71
  return {
72
- id: index + 1,
73
  title: descriptionPart.replace(/_/g, ' ').replace(/^\w/, c => c.toUpperCase()),
74
  description: `A debate exploring ${queryPart.replace(/_/g, ' ')}`,
75
  audio_file: `${API_URL}${file.path}`,
@@ -79,15 +82,18 @@ const PodcastForm: React.FC = () => {
79
  });
80
 
81
  const selectedPodcast = podcastList.find(p => p.id === parseInt(id));
82
- if (selectedPodcast) {
83
- setPodcast(selectedPodcast);
84
-
85
- // Fetch podcast context
86
- const contextResponse = await fetch(`${API_URL}/api/podcast/${id}/context`);
87
- if (contextResponse.ok) {
88
- const contextData: PodcastContext = await contextResponse.json();
89
- setPodcastContext(contextData);
90
- }
 
 
 
91
  }
92
  } catch (err) {
93
  console.error('Error fetching podcast:', err);
@@ -124,7 +130,8 @@ const PodcastForm: React.FC = () => {
124
  });
125
 
126
  if (!response.ok) {
127
- throw new Error(`Server error: ${response.status}`);
 
128
  }
129
 
130
  const data = await response.json();
 
44
  useEffect(() => {
45
  const fetchPodcastAndContext = async () => {
46
  try {
47
+ if (!id) {
48
+ setError("No podcast ID provided");
49
+ return;
50
+ }
51
 
52
  // Fetch podcast details
53
  const response = await fetch(`${API_URL}/api/audio-list`);
 
72
  const category = categoryWithExt.replace('.mp3', '');
73
 
74
  return {
75
+ id: index + 1, // Use 1-based index for consistency
76
  title: descriptionPart.replace(/_/g, ' ').replace(/^\w/, c => c.toUpperCase()),
77
  description: `A debate exploring ${queryPart.replace(/_/g, ' ')}`,
78
  audio_file: `${API_URL}${file.path}`,
 
82
  });
83
 
84
  const selectedPodcast = podcastList.find(p => p.id === parseInt(id));
85
+ if (!selectedPodcast) {
86
+ throw new Error(`Podcast with ID ${id} not found`);
87
+ }
88
+ setPodcast(selectedPodcast);
89
+
90
+ // Fetch podcast context
91
+ const contextResponse = await fetch(`${API_URL}/api/podcast/${id}/context`);
92
+ if (contextResponse.ok) {
93
+ const contextData: PodcastContext = await contextResponse.json();
94
+ setPodcastContext(contextData);
95
+ } else {
96
+ console.warn(`Could not fetch context for podcast ${id}`);
97
  }
98
  } catch (err) {
99
  console.error('Error fetching podcast:', err);
 
130
  });
131
 
132
  if (!response.ok) {
133
+ const errorData = await response.text();
134
+ throw new Error(`Server error: ${response.status} ${errorData}`);
135
  }
136
 
137
  const data = await response.json();
server/main.py CHANGED
@@ -333,7 +333,7 @@ async def get_podcast_context(podcast_id: str):
333
  logger.error(f"Error in get_podcast_context: {str(e)}", exc_info=True)
334
  raise HTTPException(status_code=500, detail=str(e))
335
 
336
- @api_router.post("/podcast-chat/{podcast_id}", response_model=PodcastChatResponse)
337
  async def podcast_chat(podcast_id: str, request: PodcastChatRequest):
338
  """Handle chat messages for a specific podcast."""
339
  try:
@@ -342,34 +342,39 @@ async def podcast_chat(podcast_id: str, request: PodcastChatRequest):
342
  # Path to transcripts file
343
  transcripts_file = os.path.join(os.path.dirname(__file__), "transcripts", "podcasts.json")
344
 
345
- # Check if transcripts file exists and initialize if needed
346
  if not os.path.exists(transcripts_file):
347
- logger.warning("Transcripts file not found, initializing empty file")
348
- with open(transcripts_file, 'w') as f:
349
- json.dump([], f)
350
- raise HTTPException(status_code=404, detail="No transcript available for this podcast yet")
351
 
352
  # Read transcripts
353
  try:
354
  with open(transcripts_file, 'r') as f:
355
  transcripts = json.load(f)
 
356
  except json.JSONDecodeError as e:
357
- logger.error(f"Error reading transcripts file: {str(e)}")
358
- raise HTTPException(status_code=500, detail="Error reading podcast transcript")
359
 
360
  # Convert podcast_id to zero-based index
361
  try:
362
  podcast_index = int(podcast_id) - 1
363
  if podcast_index < 0 or podcast_index >= len(transcripts):
 
364
  raise ValueError(f"Invalid podcast ID: {podcast_id}")
365
  except ValueError as e:
 
366
  raise HTTPException(status_code=404, detail=str(e))
367
 
368
  # Get podcast transcript
369
  try:
370
  podcast_transcript = transcripts[podcast_index].get("podcastScript")
371
  if not podcast_transcript:
 
372
  raise HTTPException(status_code=404, detail="No transcript content found for this podcast")
 
 
 
373
  except (IndexError, KeyError) as e:
374
  logger.error(f"Error accessing podcast transcript: {str(e)}")
375
  raise HTTPException(status_code=404, detail="Transcript not found for this podcast")
@@ -383,8 +388,10 @@ async def podcast_chat(podcast_id: str, request: PodcastChatRequest):
383
 
384
  # Use split_text for strings instead of split_documents
385
  chunks = text_splitter.split_text(podcast_transcript)
 
386
 
387
  if not chunks:
 
388
  raise HTTPException(status_code=404, detail="No content chunks found in transcript")
389
 
390
  # Initialize embedding model
@@ -400,6 +407,7 @@ async def podcast_chat(podcast_id: str, request: PodcastChatRequest):
400
  location=":memory:", # Use in-memory storage
401
  collection_name=collection_name
402
  )
 
403
 
404
  # Configure the retriever with search parameters
405
  qdrant_retriever = vectorstore.as_retriever(
@@ -428,7 +436,8 @@ async def podcast_chat(podcast_id: str, request: PodcastChatRequest):
428
 
429
  # Add logging for the retrieved documents and final prompt
430
  def get_context_and_log(input_dict):
431
- context = format_docs(qdrant_retriever.get_relevant_documents(input_dict["question"]))
 
432
  logger.info("Retrieved context from podcast:")
433
  logger.info("-" * 50)
434
  logger.info(f"Context:\n{context}")
@@ -447,11 +456,12 @@ async def podcast_chat(podcast_id: str, request: PodcastChatRequest):
447
 
448
  # Get response
449
  response = chain.invoke({"question": request.message})
 
450
 
451
  return PodcastChatResponse(response=response.content)
452
 
453
- except HTTPException as he:
454
- raise he
455
  except Exception as e:
456
  logger.error(f"Error in podcast chat: {str(e)}", exc_info=True)
457
  raise HTTPException(status_code=500, detail=str(e))
 
333
  logger.error(f"Error in get_podcast_context: {str(e)}", exc_info=True)
334
  raise HTTPException(status_code=500, detail=str(e))
335
 
336
+ @app.post("/podcast-chat/{podcast_id}", response_model=PodcastChatResponse)
337
  async def podcast_chat(podcast_id: str, request: PodcastChatRequest):
338
  """Handle chat messages for a specific podcast."""
339
  try:
 
342
  # Path to transcripts file
343
  transcripts_file = os.path.join(os.path.dirname(__file__), "transcripts", "podcasts.json")
344
 
345
+ # Check if transcripts file exists
346
  if not os.path.exists(transcripts_file):
347
+ logger.error("Transcripts file not found")
348
+ raise HTTPException(status_code=404, detail="Transcripts file not found")
 
 
349
 
350
  # Read transcripts
351
  try:
352
  with open(transcripts_file, 'r') as f:
353
  transcripts = json.load(f)
354
+ logger.info(f"Loaded {len(transcripts)} transcripts")
355
  except json.JSONDecodeError as e:
356
+ logger.error(f"Error decoding transcripts file: {str(e)}")
357
+ raise HTTPException(status_code=500, detail="Error reading transcripts file")
358
 
359
  # Convert podcast_id to zero-based index
360
  try:
361
  podcast_index = int(podcast_id) - 1
362
  if podcast_index < 0 or podcast_index >= len(transcripts):
363
+ logger.error(f"Invalid podcast index: {podcast_index} (total transcripts: {len(transcripts)})")
364
  raise ValueError(f"Invalid podcast ID: {podcast_id}")
365
  except ValueError as e:
366
+ logger.error(f"Error converting podcast ID: {str(e)}")
367
  raise HTTPException(status_code=404, detail=str(e))
368
 
369
  # Get podcast transcript
370
  try:
371
  podcast_transcript = transcripts[podcast_index].get("podcastScript")
372
  if not podcast_transcript:
373
+ logger.error(f"No transcript content found for podcast {podcast_id}")
374
  raise HTTPException(status_code=404, detail="No transcript content found for this podcast")
375
+
376
+ logger.info(f"Found transcript for podcast {podcast_id}")
377
+ logger.debug(f"Transcript content: {podcast_transcript[:200]}...") # Log first 200 chars
378
  except (IndexError, KeyError) as e:
379
  logger.error(f"Error accessing podcast transcript: {str(e)}")
380
  raise HTTPException(status_code=404, detail="Transcript not found for this podcast")
 
388
 
389
  # Use split_text for strings instead of split_documents
390
  chunks = text_splitter.split_text(podcast_transcript)
391
+ logger.info(f"Split transcript into {len(chunks)} chunks")
392
 
393
  if not chunks:
394
+ logger.error("No content chunks found in transcript")
395
  raise HTTPException(status_code=404, detail="No content chunks found in transcript")
396
 
397
  # Initialize embedding model
 
407
  location=":memory:", # Use in-memory storage
408
  collection_name=collection_name
409
  )
410
+ logger.info(f"Created vector store for podcast {podcast_id}")
411
 
412
  # Configure the retriever with search parameters
413
  qdrant_retriever = vectorstore.as_retriever(
 
436
 
437
  # Add logging for the retrieved documents and final prompt
438
  def get_context_and_log(input_dict):
439
+ retrieved_docs = qdrant_retriever.get_relevant_documents(input_dict["question"])
440
+ context = format_docs(retrieved_docs)
441
  logger.info("Retrieved context from podcast:")
442
  logger.info("-" * 50)
443
  logger.info(f"Context:\n{context}")
 
456
 
457
  # Get response
458
  response = chain.invoke({"question": request.message})
459
+ logger.info(f"Generated response: {response.content}")
460
 
461
  return PodcastChatResponse(response=response.content)
462
 
463
+ except HTTPException:
464
+ raise
465
  except Exception as e:
466
  logger.error(f"Error in podcast chat: {str(e)}", exc_info=True)
467
  raise HTTPException(status_code=500, detail=str(e))