File size: 4,842 Bytes
6a9fed6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import os
import uuid
import logging
from datetime import datetime
from pathlib import Path
from fastapi import FastAPI, HTTPException, BackgroundTasks, Request
from fastapi.responses import StreamingResponse
from pydantic import BaseModel

from utils import (
    sanitize_url,
    crawl_documentation,
    get_voice_prompt_style,
    voice_map,
)
from ai_agents import Runner, setup_agents
from generate_audio import generate_audio

app = FastAPI()
Path("audio_outputs").mkdir(parents=True, exist_ok=True)
AUDIO_DIR = "audio_outputs"

logging.basicConfig(
    filename="voice_agent.log",
    filemode="w",
    format="%(asctime)s | %(levelname)s | %(message)s",
    level=logging.INFO,
)
logger = logging.getLogger(__name__)

class QueryRequest(BaseModel):
    query: str
    url: str = None
    voice: str = None
    file_text: str = None

from typing import Optional

class QueryResponse(BaseModel):
    answer: str
    audio_key: Optional[str] = None
    sources: list = []
    key_points: list[str] = []

@app.post("/process", response_model=QueryResponse)
async def process_query(req: QueryRequest, background_tasks: BackgroundTasks):
    try:
        start = datetime.now()
        logger.info(f"🧠 Processing query: {req.query}")
        logger.info(f"🌐 URL: {req.url}")
        logger.info(f"πŸ“Ž File text preview: {req.file_text[:100] if req.file_text else 'None'}")
        logger.info(f"πŸŽ™οΈ Voice: {req.voice}")

        key_points = []
        if req.file_text:
            from ai_agents import Agent
            extract_agent = Agent(
                name="KeyPointAgent",
                instructions="Extract the 5–7 most important key points from this content. Respond only as a bullet list.",
                model="gpt-4o"
            )
            key_points_raw = await extract_agent.run(req.file_text)
            key_points = [line.strip('-β€’* ').strip() for line in key_points_raw.splitlines() if line.strip()]
            if not key_points:
                logger.info('⚠️ No bullet points detected from GPT, using fallback.')
                key_points = [key_points_raw.strip()]
            logger.info(f'πŸ”Ž Final key points: {key_points}')

        if req.url:
            try:
                content = crawl_documentation(req.url)
                context = f"{content}\n\nNow answer the user's question: {req.query}"
            except Exception as e:
                logger.warning(f"⚠️ URL crawl failed: {e}")
                context = f"Answer the following using your general knowledge:\n\n{req.query}"
        elif req.file_text:
            context = f"{req.file_text}\n\nNow answer the user's question: {req.query}"
        else:
            context = f"Answer the following using your general knowledge:\n\n{req.query}"

        tone = get_voice_prompt_style(req.voice or "")
        if tone:
            context = tone + "\n\n" + context

        processor, _ = setup_agents()
        logger.info("🧠 Sending context to GPT")
        answer = await Runner.run(processor, context)

        if not answer:
            raise HTTPException(status_code=500, detail="No GPT response.")

        logger.info(f"βœ… GPT returned: {answer[:100]}...")
        logger.info(f"πŸ€– GPT answer complete. ⏱️ {datetime.now() - start}")

        audio_key = None
        if req.voice and req.voice in voice_map:
            voice_id = voice_map[req.voice]
            audio_key = str(uuid.uuid4())

            generate_audio(answer, voice_id, audio_key)
            logger.info(f"πŸŽ™οΈ Audio generation triggered for voice: {req.voice}")

            # βœ… Check if audio file actually exists
            output_path = os.path.join(AUDIO_DIR, f"{audio_key}.mp3")
            if not os.path.exists(output_path) or os.path.getsize(output_path) < 1000:
                logger.warning("πŸ›‘ Audio generation failed or file is too small.")
                audio_key = None
        else:
            logger.warning("πŸ›‘ Invalid voice")

        return QueryResponse(answer=answer, audio_key=audio_key, sources=[], key_points=key_points)

    except Exception as e:
        logger.error(f"πŸ”₯ Internal error: {str(e)}")
        import traceback
        logger.error("".join(traceback.format_exception(None, e, e.__traceback__)))
        raise HTTPException(status_code=500, detail=str(e))

@app.get("/get-audio/{key}")
async def get_audio(key: str, request: Request):
    audio_path = os.path.join(AUDIO_DIR, f"{key}.mp3")
    if not os.path.exists(audio_path):
        raise HTTPException(status_code=404, detail="Audio not found")

    if request.method == "HEAD":
        return StreamingResponse(iter([]), status_code=200)

    def iterfile():
        with open(audio_path, mode="rb") as file:
            yield from file
    return StreamingResponse(iterfile(), media_type="audio/mpeg")