Spaces:
Sleeping
Sleeping
import os | |
import json | |
import uuid | |
import logging | |
# Configure logging | |
logger = logging.getLogger(__name__) | |
# Create transcripts directory if it doesn't exist | |
TRANSCRIPTS_DIR = os.path.join(os.path.dirname(__file__), "transcripts") | |
os.makedirs(TRANSCRIPTS_DIR, exist_ok=True) | |
TRANSCRIPTS_FILE = os.path.join(TRANSCRIPTS_DIR, "podcasts.json") | |
def save_transcript(podcast_script: str, user_query: str) -> None: | |
"""Save podcast transcript to JSON file.""" | |
# Process the topic to match filename format | |
topic = user_query.lower().strip().replace(" ", "_") | |
topic = topic.replace("?", "").replace("!", "").replace(".", "") # Remove punctuation | |
# Create new transcript entry | |
transcript = { | |
"id": str(uuid.uuid4()), | |
"podcastScript": podcast_script, | |
"topic": topic.replace("_", " ") # Store topic with spaces for matching | |
} | |
try: | |
# Load existing transcripts | |
if os.path.exists(TRANSCRIPTS_FILE): | |
try: | |
with open(TRANSCRIPTS_FILE, 'r') as f: | |
transcripts = json.load(f) | |
if not isinstance(transcripts, list): | |
transcripts = [] | |
except json.JSONDecodeError: | |
logger.warning("Error reading transcripts file, initializing empty list") | |
transcripts = [] | |
else: | |
transcripts = [] | |
# Check if transcript for this topic already exists | |
for i, existing in enumerate(transcripts): | |
if existing.get("topic") == transcript["topic"]: | |
# Update existing transcript | |
transcripts[i] = transcript | |
break | |
else: | |
# Append new transcript if no existing one was found | |
transcripts.append(transcript) | |
# Save updated transcripts | |
with open(TRANSCRIPTS_FILE, 'w') as f: | |
json.dump(transcripts, f, indent=2) | |
logger.info(f"Successfully saved transcript for topic: {transcript['topic']}") | |
except Exception as e: | |
logger.error(f"Error saving transcript: {str(e)}") | |
# Create directory if it doesn't exist | |
os.makedirs(os.path.dirname(TRANSCRIPTS_FILE), exist_ok=True) | |
# Try to save just this transcript | |
with open(TRANSCRIPTS_FILE, 'w') as f: | |
json.dump([transcript], f, indent=2) | |
logger.info("Saved single transcript after error") |