Spaces:
Sleeping
Sleeping
Nagesh Muralidhar
commited on
Commit
·
bd04115
1
Parent(s):
1aeaa53
midterm-submission
Browse files- server/agents.py +8 -0
- server/main.py +33 -5
- server/workflow.py +12 -2
server/agents.py
CHANGED
@@ -15,6 +15,7 @@ import numpy as np
|
|
15 |
from langchain.schema import SystemMessage, HumanMessage, AIMessage
|
16 |
from langchain.output_parsers import PydanticOutputParser
|
17 |
from pydantic import BaseModel, Field
|
|
|
18 |
|
19 |
# Configure logging
|
20 |
logging.basicConfig(
|
@@ -382,6 +383,13 @@ class PodcastProducerAgent:
|
|
382 |
|
383 |
podcast_logger.info(f"Successfully saved audio file: {filepath}")
|
384 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
385 |
return {
|
386 |
"type": "podcast",
|
387 |
"content": script_response.content,
|
|
|
15 |
from langchain.schema import SystemMessage, HumanMessage, AIMessage
|
16 |
from langchain.output_parsers import PydanticOutputParser
|
17 |
from pydantic import BaseModel, Field
|
18 |
+
from workflow import save_transcript
|
19 |
|
20 |
# Configure logging
|
21 |
logging.basicConfig(
|
|
|
383 |
|
384 |
podcast_logger.info(f"Successfully saved audio file: {filepath}")
|
385 |
|
386 |
+
# Save the transcript
|
387 |
+
try:
|
388 |
+
save_transcript(script_response.content, user_query)
|
389 |
+
podcast_logger.info("Successfully saved transcript")
|
390 |
+
except Exception as e:
|
391 |
+
podcast_logger.error(f"Error saving transcript: {str(e)}")
|
392 |
+
|
393 |
return {
|
394 |
"type": "podcast",
|
395 |
"content": script_response.content,
|
server/main.py
CHANGED
@@ -82,6 +82,16 @@ os.makedirs(audio_dir, exist_ok=True)
|
|
82 |
context_dir = os.path.join(os.path.dirname(__file__), "context_storage")
|
83 |
os.makedirs(context_dir, exist_ok=True)
|
84 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
# API Routes
|
86 |
@api_router.post("/chat")
|
87 |
async def chat(message: ChatMessage):
|
@@ -332,13 +342,20 @@ async def podcast_chat(podcast_id: str, request: PodcastChatRequest):
|
|
332 |
# Path to transcripts file
|
333 |
transcripts_file = os.path.join(os.path.dirname(__file__), "transcripts", "podcasts.json")
|
334 |
|
335 |
-
# Check if transcripts file exists
|
336 |
if not os.path.exists(transcripts_file):
|
337 |
-
|
|
|
|
|
|
|
338 |
|
339 |
# Read transcripts
|
340 |
-
|
341 |
-
|
|
|
|
|
|
|
|
|
342 |
|
343 |
# Convert podcast_id to zero-based index
|
344 |
try:
|
@@ -349,7 +366,13 @@ async def podcast_chat(podcast_id: str, request: PodcastChatRequest):
|
|
349 |
raise HTTPException(status_code=404, detail=str(e))
|
350 |
|
351 |
# Get podcast transcript
|
352 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
353 |
|
354 |
# Split text into chunks
|
355 |
text_splitter = RecursiveCharacterTextSplitter(
|
@@ -361,6 +384,9 @@ async def podcast_chat(podcast_id: str, request: PodcastChatRequest):
|
|
361 |
# Use split_text for strings instead of split_documents
|
362 |
chunks = text_splitter.split_text(podcast_transcript)
|
363 |
|
|
|
|
|
|
|
364 |
# Initialize embedding model
|
365 |
embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
|
366 |
|
@@ -424,6 +450,8 @@ async def podcast_chat(podcast_id: str, request: PodcastChatRequest):
|
|
424 |
|
425 |
return PodcastChatResponse(response=response.content)
|
426 |
|
|
|
|
|
427 |
except Exception as e:
|
428 |
logger.error(f"Error in podcast chat: {str(e)}", exc_info=True)
|
429 |
raise HTTPException(status_code=500, detail=str(e))
|
|
|
82 |
context_dir = os.path.join(os.path.dirname(__file__), "context_storage")
|
83 |
os.makedirs(context_dir, exist_ok=True)
|
84 |
|
85 |
+
# Add transcripts directory
|
86 |
+
transcripts_dir = os.path.join(os.path.dirname(__file__), "transcripts")
|
87 |
+
os.makedirs(transcripts_dir, exist_ok=True)
|
88 |
+
|
89 |
+
# Initialize empty transcripts file if it doesn't exist
|
90 |
+
transcripts_file = os.path.join(transcripts_dir, "podcasts.json")
|
91 |
+
if not os.path.exists(transcripts_file):
|
92 |
+
with open(transcripts_file, 'w') as f:
|
93 |
+
json.dump([], f)
|
94 |
+
|
95 |
# API Routes
|
96 |
@api_router.post("/chat")
|
97 |
async def chat(message: ChatMessage):
|
|
|
342 |
# Path to transcripts file
|
343 |
transcripts_file = os.path.join(os.path.dirname(__file__), "transcripts", "podcasts.json")
|
344 |
|
345 |
+
# Check if transcripts file exists and initialize if needed
|
346 |
if not os.path.exists(transcripts_file):
|
347 |
+
logger.warning("Transcripts file not found, initializing empty file")
|
348 |
+
with open(transcripts_file, 'w') as f:
|
349 |
+
json.dump([], f)
|
350 |
+
raise HTTPException(status_code=404, detail="No transcript available for this podcast yet")
|
351 |
|
352 |
# Read transcripts
|
353 |
+
try:
|
354 |
+
with open(transcripts_file, 'r') as f:
|
355 |
+
transcripts = json.load(f)
|
356 |
+
except json.JSONDecodeError as e:
|
357 |
+
logger.error(f"Error reading transcripts file: {str(e)}")
|
358 |
+
raise HTTPException(status_code=500, detail="Error reading podcast transcript")
|
359 |
|
360 |
# Convert podcast_id to zero-based index
|
361 |
try:
|
|
|
366 |
raise HTTPException(status_code=404, detail=str(e))
|
367 |
|
368 |
# Get podcast transcript
|
369 |
+
try:
|
370 |
+
podcast_transcript = transcripts[podcast_index].get("podcastScript")
|
371 |
+
if not podcast_transcript:
|
372 |
+
raise HTTPException(status_code=404, detail="No transcript content found for this podcast")
|
373 |
+
except (IndexError, KeyError) as e:
|
374 |
+
logger.error(f"Error accessing podcast transcript: {str(e)}")
|
375 |
+
raise HTTPException(status_code=404, detail="Transcript not found for this podcast")
|
376 |
|
377 |
# Split text into chunks
|
378 |
text_splitter = RecursiveCharacterTextSplitter(
|
|
|
384 |
# Use split_text for strings instead of split_documents
|
385 |
chunks = text_splitter.split_text(podcast_transcript)
|
386 |
|
387 |
+
if not chunks:
|
388 |
+
raise HTTPException(status_code=404, detail="No content chunks found in transcript")
|
389 |
+
|
390 |
# Initialize embedding model
|
391 |
embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
|
392 |
|
|
|
450 |
|
451 |
return PodcastChatResponse(response=response.content)
|
452 |
|
453 |
+
except HTTPException as he:
|
454 |
+
raise he
|
455 |
except Exception as e:
|
456 |
logger.error(f"Error in podcast chat: {str(e)}", exc_info=True)
|
457 |
raise HTTPException(status_code=500, detail=str(e))
|
server/workflow.py
CHANGED
@@ -26,8 +26,13 @@ def save_transcript(podcast_script: str, user_query: str) -> None:
|
|
26 |
try:
|
27 |
# Load existing transcripts
|
28 |
if os.path.exists(TRANSCRIPTS_FILE):
|
29 |
-
|
30 |
-
|
|
|
|
|
|
|
|
|
|
|
31 |
else:
|
32 |
transcripts = []
|
33 |
|
@@ -40,6 +45,11 @@ def save_transcript(podcast_script: str, user_query: str) -> None:
|
|
40 |
|
41 |
except Exception as e:
|
42 |
print(f"Error saving transcript: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
43 |
|
44 |
class AgentState(TypedDict):
|
45 |
messages: List[Dict[str, Any]]
|
|
|
26 |
try:
|
27 |
# Load existing transcripts
|
28 |
if os.path.exists(TRANSCRIPTS_FILE):
|
29 |
+
try:
|
30 |
+
with open(TRANSCRIPTS_FILE, 'r') as f:
|
31 |
+
transcripts = json.load(f)
|
32 |
+
if not isinstance(transcripts, list):
|
33 |
+
transcripts = []
|
34 |
+
except json.JSONDecodeError:
|
35 |
+
transcripts = []
|
36 |
else:
|
37 |
transcripts = []
|
38 |
|
|
|
45 |
|
46 |
except Exception as e:
|
47 |
print(f"Error saving transcript: {str(e)}")
|
48 |
+
# Create directory if it doesn't exist
|
49 |
+
os.makedirs(os.path.dirname(TRANSCRIPTS_FILE), exist_ok=True)
|
50 |
+
# Try to save just this transcript
|
51 |
+
with open(TRANSCRIPTS_FILE, 'w') as f:
|
52 |
+
json.dump([transcript], f, indent=2)
|
53 |
|
54 |
class AgentState(TypedDict):
|
55 |
messages: List[Dict[str, Any]]
|