Nagesh Muralidhar commited on
Commit
bd04115
·
1 Parent(s): 1aeaa53

midterm-submission

Browse files
Files changed (3) hide show
  1. server/agents.py +8 -0
  2. server/main.py +33 -5
  3. server/workflow.py +12 -2
server/agents.py CHANGED
@@ -15,6 +15,7 @@ import numpy as np
15
  from langchain.schema import SystemMessage, HumanMessage, AIMessage
16
  from langchain.output_parsers import PydanticOutputParser
17
  from pydantic import BaseModel, Field
 
18
 
19
  # Configure logging
20
  logging.basicConfig(
@@ -382,6 +383,13 @@ class PodcastProducerAgent:
382
 
383
  podcast_logger.info(f"Successfully saved audio file: {filepath}")
384
 
 
 
 
 
 
 
 
385
  return {
386
  "type": "podcast",
387
  "content": script_response.content,
 
15
  from langchain.schema import SystemMessage, HumanMessage, AIMessage
16
  from langchain.output_parsers import PydanticOutputParser
17
  from pydantic import BaseModel, Field
18
+ from workflow import save_transcript
19
 
20
  # Configure logging
21
  logging.basicConfig(
 
383
 
384
  podcast_logger.info(f"Successfully saved audio file: {filepath}")
385
 
386
+ # Save the transcript
387
+ try:
388
+ save_transcript(script_response.content, user_query)
389
+ podcast_logger.info("Successfully saved transcript")
390
+ except Exception as e:
391
+ podcast_logger.error(f"Error saving transcript: {str(e)}")
392
+
393
  return {
394
  "type": "podcast",
395
  "content": script_response.content,
server/main.py CHANGED
@@ -82,6 +82,16 @@ os.makedirs(audio_dir, exist_ok=True)
82
  context_dir = os.path.join(os.path.dirname(__file__), "context_storage")
83
  os.makedirs(context_dir, exist_ok=True)
84
 
 
 
 
 
 
 
 
 
 
 
85
  # API Routes
86
  @api_router.post("/chat")
87
  async def chat(message: ChatMessage):
@@ -332,13 +342,20 @@ async def podcast_chat(podcast_id: str, request: PodcastChatRequest):
332
  # Path to transcripts file
333
  transcripts_file = os.path.join(os.path.dirname(__file__), "transcripts", "podcasts.json")
334
 
335
- # Check if transcripts file exists
336
  if not os.path.exists(transcripts_file):
337
- raise HTTPException(status_code=404, detail="Transcripts file not found")
 
 
 
338
 
339
  # Read transcripts
340
- with open(transcripts_file, 'r') as f:
341
- transcripts = json.load(f)
 
 
 
 
342
 
343
  # Convert podcast_id to zero-based index
344
  try:
@@ -349,7 +366,13 @@ async def podcast_chat(podcast_id: str, request: PodcastChatRequest):
349
  raise HTTPException(status_code=404, detail=str(e))
350
 
351
  # Get podcast transcript
352
- podcast_transcript = transcripts[podcast_index]["podcastScript"]
 
 
 
 
 
 
353
 
354
  # Split text into chunks
355
  text_splitter = RecursiveCharacterTextSplitter(
@@ -361,6 +384,9 @@ async def podcast_chat(podcast_id: str, request: PodcastChatRequest):
361
  # Use split_text for strings instead of split_documents
362
  chunks = text_splitter.split_text(podcast_transcript)
363
 
 
 
 
364
  # Initialize embedding model
365
  embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
366
 
@@ -424,6 +450,8 @@ async def podcast_chat(podcast_id: str, request: PodcastChatRequest):
424
 
425
  return PodcastChatResponse(response=response.content)
426
 
 
 
427
  except Exception as e:
428
  logger.error(f"Error in podcast chat: {str(e)}", exc_info=True)
429
  raise HTTPException(status_code=500, detail=str(e))
 
82
  context_dir = os.path.join(os.path.dirname(__file__), "context_storage")
83
  os.makedirs(context_dir, exist_ok=True)
84
 
85
+ # Add transcripts directory
86
+ transcripts_dir = os.path.join(os.path.dirname(__file__), "transcripts")
87
+ os.makedirs(transcripts_dir, exist_ok=True)
88
+
89
+ # Initialize empty transcripts file if it doesn't exist
90
+ transcripts_file = os.path.join(transcripts_dir, "podcasts.json")
91
+ if not os.path.exists(transcripts_file):
92
+ with open(transcripts_file, 'w') as f:
93
+ json.dump([], f)
94
+
95
  # API Routes
96
  @api_router.post("/chat")
97
  async def chat(message: ChatMessage):
 
342
  # Path to transcripts file
343
  transcripts_file = os.path.join(os.path.dirname(__file__), "transcripts", "podcasts.json")
344
 
345
+ # Check if transcripts file exists and initialize if needed
346
  if not os.path.exists(transcripts_file):
347
+ logger.warning("Transcripts file not found, initializing empty file")
348
+ with open(transcripts_file, 'w') as f:
349
+ json.dump([], f)
350
+ raise HTTPException(status_code=404, detail="No transcript available for this podcast yet")
351
 
352
  # Read transcripts
353
+ try:
354
+ with open(transcripts_file, 'r') as f:
355
+ transcripts = json.load(f)
356
+ except json.JSONDecodeError as e:
357
+ logger.error(f"Error reading transcripts file: {str(e)}")
358
+ raise HTTPException(status_code=500, detail="Error reading podcast transcript")
359
 
360
  # Convert podcast_id to zero-based index
361
  try:
 
366
  raise HTTPException(status_code=404, detail=str(e))
367
 
368
  # Get podcast transcript
369
+ try:
370
+ podcast_transcript = transcripts[podcast_index].get("podcastScript")
371
+ if not podcast_transcript:
372
+ raise HTTPException(status_code=404, detail="No transcript content found for this podcast")
373
+ except (IndexError, KeyError) as e:
374
+ logger.error(f"Error accessing podcast transcript: {str(e)}")
375
+ raise HTTPException(status_code=404, detail="Transcript not found for this podcast")
376
 
377
  # Split text into chunks
378
  text_splitter = RecursiveCharacterTextSplitter(
 
384
  # Use split_text for strings instead of split_documents
385
  chunks = text_splitter.split_text(podcast_transcript)
386
 
387
+ if not chunks:
388
+ raise HTTPException(status_code=404, detail="No content chunks found in transcript")
389
+
390
  # Initialize embedding model
391
  embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
392
 
 
450
 
451
  return PodcastChatResponse(response=response.content)
452
 
453
+ except HTTPException as he:
454
+ raise he
455
  except Exception as e:
456
  logger.error(f"Error in podcast chat: {str(e)}", exc_info=True)
457
  raise HTTPException(status_code=500, detail=str(e))
server/workflow.py CHANGED
@@ -26,8 +26,13 @@ def save_transcript(podcast_script: str, user_query: str) -> None:
26
  try:
27
  # Load existing transcripts
28
  if os.path.exists(TRANSCRIPTS_FILE):
29
- with open(TRANSCRIPTS_FILE, 'r') as f:
30
- transcripts = json.load(f)
 
 
 
 
 
31
  else:
32
  transcripts = []
33
 
@@ -40,6 +45,11 @@ def save_transcript(podcast_script: str, user_query: str) -> None:
40
 
41
  except Exception as e:
42
  print(f"Error saving transcript: {str(e)}")
 
 
 
 
 
43
 
44
  class AgentState(TypedDict):
45
  messages: List[Dict[str, Any]]
 
26
  try:
27
  # Load existing transcripts
28
  if os.path.exists(TRANSCRIPTS_FILE):
29
+ try:
30
+ with open(TRANSCRIPTS_FILE, 'r') as f:
31
+ transcripts = json.load(f)
32
+ if not isinstance(transcripts, list):
33
+ transcripts = []
34
+ except json.JSONDecodeError:
35
+ transcripts = []
36
  else:
37
  transcripts = []
38
 
 
45
 
46
  except Exception as e:
47
  print(f"Error saving transcript: {str(e)}")
48
+ # Create directory if it doesn't exist
49
+ os.makedirs(os.path.dirname(TRANSCRIPTS_FILE), exist_ok=True)
50
+ # Try to save just this transcript
51
+ with open(TRANSCRIPTS_FILE, 'w') as f:
52
+ json.dump([transcript], f, indent=2)
53
 
54
  class AgentState(TypedDict):
55
  messages: List[Dict[str, Any]]