Spaces:

dataera2013
/

midterm

Sleeping

App Files Files Community

Nagesh Muralidhar commited on Feb 24

Commit

bd04115

1 Parent(s): 1aeaa53

midterm-submission

Browse files

Files changed (3) hide show

server/agents.py +8 -0
server/main.py +33 -5
server/workflow.py +12 -2

server/agents.py CHANGED Viewed

@@ -15,6 +15,7 @@ import numpy as np
 from langchain.schema import SystemMessage, HumanMessage, AIMessage
 from langchain.output_parsers import PydanticOutputParser
 from pydantic import BaseModel, Field
 # Configure logging
 logging.basicConfig(
@@ -382,6 +383,13 @@ class PodcastProducerAgent:
                 podcast_logger.info(f"Successfully saved audio file: {filepath}")
                 return {
                     "type": "podcast",
                     "content": script_response.content,

 from langchain.schema import SystemMessage, HumanMessage, AIMessage
 from langchain.output_parsers import PydanticOutputParser
 from pydantic import BaseModel, Field
+from workflow import save_transcript
 # Configure logging
 logging.basicConfig(
                 podcast_logger.info(f"Successfully saved audio file: {filepath}")
+                # Save the transcript
+                try:
+                    save_transcript(script_response.content, user_query)
+                    podcast_logger.info("Successfully saved transcript")
+                except Exception as e:
+                    podcast_logger.error(f"Error saving transcript: {str(e)}")
                 return {
                     "type": "podcast",
                     "content": script_response.content,

server/main.py CHANGED Viewed

@@ -82,6 +82,16 @@ os.makedirs(audio_dir, exist_ok=True)
 context_dir = os.path.join(os.path.dirname(__file__), "context_storage")
 os.makedirs(context_dir, exist_ok=True)
 # API Routes
 @api_router.post("/chat")
 async def chat(message: ChatMessage):
@@ -332,13 +342,20 @@ async def podcast_chat(podcast_id: str, request: PodcastChatRequest):
         # Path to transcripts file
         transcripts_file = os.path.join(os.path.dirname(__file__), "transcripts", "podcasts.json")
-        # Check if transcripts file exists
         if not os.path.exists(transcripts_file):
-            raise HTTPException(status_code=404, detail="Transcripts file not found")
         # Read transcripts
-        with open(transcripts_file, 'r') as f:
-            transcripts = json.load(f)
         # Convert podcast_id to zero-based index
         try:
@@ -349,7 +366,13 @@ async def podcast_chat(podcast_id: str, request: PodcastChatRequest):
             raise HTTPException(status_code=404, detail=str(e))
         # Get podcast transcript
-        podcast_transcript = transcripts[podcast_index]["podcastScript"]
         # Split text into chunks
         text_splitter = RecursiveCharacterTextSplitter(
@@ -361,6 +384,9 @@ async def podcast_chat(podcast_id: str, request: PodcastChatRequest):
         # Use split_text for strings instead of split_documents
         chunks = text_splitter.split_text(podcast_transcript)
         # Initialize embedding model
         embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
@@ -424,6 +450,8 @@ async def podcast_chat(podcast_id: str, request: PodcastChatRequest):
         return PodcastChatResponse(response=response.content)
     except Exception as e:
         logger.error(f"Error in podcast chat: {str(e)}", exc_info=True)
         raise HTTPException(status_code=500, detail=str(e))

 context_dir = os.path.join(os.path.dirname(__file__), "context_storage")
 os.makedirs(context_dir, exist_ok=True)
+# Add transcripts directory
+transcripts_dir = os.path.join(os.path.dirname(__file__), "transcripts")
+os.makedirs(transcripts_dir, exist_ok=True)
+# Initialize empty transcripts file if it doesn't exist
+transcripts_file = os.path.join(transcripts_dir, "podcasts.json")
+if not os.path.exists(transcripts_file):
+    with open(transcripts_file, 'w') as f:
+        json.dump([], f)
 # API Routes
 @api_router.post("/chat")
 async def chat(message: ChatMessage):
         # Path to transcripts file
         transcripts_file = os.path.join(os.path.dirname(__file__), "transcripts", "podcasts.json")
+        # Check if transcripts file exists and initialize if needed
         if not os.path.exists(transcripts_file):
+            logger.warning("Transcripts file not found, initializing empty file")
+            with open(transcripts_file, 'w') as f:
+                json.dump([], f)
+            raise HTTPException(status_code=404, detail="No transcript available for this podcast yet")
         # Read transcripts
+        try:
+            with open(transcripts_file, 'r') as f:
+                transcripts = json.load(f)
+        except json.JSONDecodeError as e:
+            logger.error(f"Error reading transcripts file: {str(e)}")
+            raise HTTPException(status_code=500, detail="Error reading podcast transcript")
         # Convert podcast_id to zero-based index
         try:
             raise HTTPException(status_code=404, detail=str(e))
         # Get podcast transcript
+        try:
+            podcast_transcript = transcripts[podcast_index].get("podcastScript")
+            if not podcast_transcript:
+                raise HTTPException(status_code=404, detail="No transcript content found for this podcast")
+        except (IndexError, KeyError) as e:
+            logger.error(f"Error accessing podcast transcript: {str(e)}")
+            raise HTTPException(status_code=404, detail="Transcript not found for this podcast")
         # Split text into chunks
         text_splitter = RecursiveCharacterTextSplitter(
         # Use split_text for strings instead of split_documents
         chunks = text_splitter.split_text(podcast_transcript)
+        if not chunks:
+            raise HTTPException(status_code=404, detail="No content chunks found in transcript")
         # Initialize embedding model
         embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
         return PodcastChatResponse(response=response.content)
+    except HTTPException as he:
+        raise he
     except Exception as e:
         logger.error(f"Error in podcast chat: {str(e)}", exc_info=True)
         raise HTTPException(status_code=500, detail=str(e))

server/workflow.py CHANGED Viewed

@@ -26,8 +26,13 @@ def save_transcript(podcast_script: str, user_query: str) -> None:
     try:
         # Load existing transcripts
         if os.path.exists(TRANSCRIPTS_FILE):
-            with open(TRANSCRIPTS_FILE, 'r') as f:
-                transcripts = json.load(f)
         else:
             transcripts = []
@@ -40,6 +45,11 @@ def save_transcript(podcast_script: str, user_query: str) -> None:
     except Exception as e:
         print(f"Error saving transcript: {str(e)}")
 class AgentState(TypedDict):
     messages: List[Dict[str, Any]]

     try:
         # Load existing transcripts
         if os.path.exists(TRANSCRIPTS_FILE):
+            try:
+                with open(TRANSCRIPTS_FILE, 'r') as f:
+                    transcripts = json.load(f)
+                    if not isinstance(transcripts, list):
+                        transcripts = []
+            except json.JSONDecodeError:
+                transcripts = []
         else:
             transcripts = []
     except Exception as e:
         print(f"Error saving transcript: {str(e)}")
+        # Create directory if it doesn't exist
+        os.makedirs(os.path.dirname(TRANSCRIPTS_FILE), exist_ok=True)
+        # Try to save just this transcript
+        with open(TRANSCRIPTS_FILE, 'w') as f:
+            json.dump([transcript], f, indent=2)
 class AgentState(TypedDict):
     messages: List[Dict[str, Any]]