Bonosa2 commited on
Commit
6a9fed6
Β·
verified Β·
1 Parent(s): b053f6c

Upload 5 files

Browse files
Files changed (5) hide show
  1. app.py +137 -0
  2. generate_audio.py +32 -0
  3. main.py +131 -0
  4. requirements.txt +9 -0
  5. utils.py +59 -0
app.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+ import time
4
+ import os
5
+
6
+ API_URL = "http://127.0.0.1:8000"
7
+
8
+ st.set_page_config(page_title="Voice Agent", layout="centered")
9
+ st.title(" Ask Me Anything – With Voice!")
10
+ st.caption("Summarized answers with expressive AI voices.")
11
+
12
+ # πŸ’š Pulse animation CSS
13
+ st.markdown("""
14
+ <style>
15
+ .pulse-box {
16
+ animation: pulse 1.5s ease-in-out;
17
+ border: 2px solid #4CAF50 !important;
18
+ border-radius: 0.5rem;
19
+ padding: 0.5rem;
20
+ }
21
+ @keyframes pulse {
22
+ 0% { box-shadow: 0 0 0px rgba(0, 255, 0, 0.5); }
23
+ 50% { box-shadow: 0 0 20px rgba(0, 255, 0, 0.9); }
24
+ 100% { box-shadow: 0 0 0px rgba(0, 255, 0, 0.5); }
25
+ }
26
+ </style>
27
+ """, unsafe_allow_html=True)
28
+
29
+ voice_options = {
30
+ "Grandma GG": "grandma GG",
31
+ "Tech Wizard": "tech wizard",
32
+ "Perky Sidekick": "perky sidekick",
33
+ "Bill the Newscaster": "bill the newscaster",
34
+ "Spunky Charlie": "spunky charlie",
35
+ "Sassy Teen": "sassy teen"
36
+ }
37
+
38
+ st.sidebar.header("πŸ”Š Voice Settings")
39
+ voice_choice = st.sidebar.selectbox("Choose a voice:", list(voice_options.keys()))
40
+ voice = voice_options[voice_choice]
41
+
42
+ for key in ["query", "url", "file_text"]:
43
+ if key not in st.session_state:
44
+ st.session_state[key] = ""
45
+
46
+ if st.button("🧹 Clear All"):
47
+ st.session_state.query = ""
48
+ st.session_state.url = ""
49
+ st.session_state.file_text = ""
50
+ st.rerun()
51
+
52
+ # Bullet-to-query safe logic BEFORE rendering input box
53
+ for i in range(10):
54
+ if st.session_state.get(f"use_bullet_{i}", False):
55
+ st.session_state["load_bullet_text"] = st.session_state.get(f"bullet_text_{i}", "")
56
+ st.rerun()
57
+
58
+ pulse = False
59
+ if "load_bullet_text" in st.session_state:
60
+ st.session_state.query = st.session_state["load_bullet_text"]
61
+ del st.session_state["load_bullet_text"]
62
+ pulse = True
63
+
64
+ # Query box with optional pulse animation
65
+ if pulse:
66
+ st.markdown('<div class="pulse-box">', unsafe_allow_html=True)
67
+ query = st.text_area("πŸ’¬ Ask or refine something based on the bullets:", key="query")
68
+ st.markdown("</div>", unsafe_allow_html=True)
69
+ else:
70
+ query = st.text_area("πŸ’¬ Ask or refine something based on the bullets:", key="query")
71
+
72
+ url = st.text_input("🌐 Optional URL to summarize:", placeholder="https://example.com", key="url")
73
+ uploaded_file = st.file_uploader("πŸ“Ž Or upload a file (PDF, TXT, DOCX)", type=["pdf", "txt", "docx"])
74
+
75
+ file_text = ""
76
+ if uploaded_file is not None:
77
+ file_text = uploaded_file.read().decode("utf-8", errors="ignore")
78
+ st.session_state.file_text = file_text
79
+ st.success("βœ… File uploaded successfully!")
80
+ else:
81
+ file_text = st.session_state.get("file_text", "")
82
+
83
+ if st.button("🎯 Summarize"):
84
+ with st.spinner("Generating response..."):
85
+ try:
86
+ payload = {
87
+ "query": query,
88
+ "url": url,
89
+ "voice": voice,
90
+ "file_text": file_text
91
+ }
92
+
93
+ try:
94
+ response = requests.post(f"{API_URL}/process", json=payload, timeout=15)
95
+ except requests.exceptions.RequestException as req_err:
96
+ st.error(f"🚨 Connection to backend failed:\n{req_err}")
97
+ st.stop()
98
+
99
+ if response.status_code == 200:
100
+ data = response.json()
101
+
102
+ if not query.strip() and data.get("key_points"):
103
+ st.markdown("### πŸ“‹ Key Points from File")
104
+ for i, point in enumerate(data["key_points"]):
105
+ st.markdown(f"- {point}")
106
+ st.session_state[f"bullet_text_{i}"] = point
107
+ st.button("β†ͺ️ Use This", key=f"use_bullet_{i}")
108
+ else:
109
+ st.markdown("### πŸ“œ Answer")
110
+ st.success(data["answer"])
111
+
112
+ # βœ… Audio playback with retries
113
+ if data.get("audio_key"):
114
+ audio_key = data["audio_key"]
115
+ audio_url = f"{API_URL}/get-audio/{audio_key}"
116
+
117
+ for attempt in range(10):
118
+ try:
119
+ audio_check = requests.get(audio_url)
120
+ if audio_check.status_code == 200:
121
+ audio_bytes = audio_check.content
122
+ if audio_bytes and len(audio_bytes) > 2000:
123
+ st.audio(audio_bytes, format="audio/mp3")
124
+ break
125
+ else:
126
+ st.warning("⚠️ Audio not ready yet. Waiting...")
127
+ else:
128
+ st.warning("⚠️ Couldn't reach audio endpoint.")
129
+ except Exception as e:
130
+ st.warning(f"⚠️ Audio request failed: {e}")
131
+ time.sleep(0.5)
132
+ else:
133
+ st.warning("⚠️ Audio not ready or empty after retries.")
134
+ else:
135
+ st.error(f"❌ Backend error: {response.status_code} - {response.text}")
136
+ except Exception as e:
137
+ st.error(f"πŸ”₯ Unexpected error:\n{e}")
generate_audio.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+ from elevenlabs import stream
4
+ from elevenlabs.client import ElevenLabs
5
+
6
+ AUDIO_DIR = "audio_outputs"
7
+ logger = logging.getLogger(__name__)
8
+
9
+ client = ElevenLabs(api_key=os.getenv("ELEVENLABS_API_KEY"))
10
+
11
+ def generate_audio(text: str, voice_id: str, audio_key: str):
12
+ try:
13
+ logger.info("🎯 Starting ElevenLabs audio generation")
14
+
15
+ audio_stream = client.text_to_speech.convert_as_stream(
16
+ text=text,
17
+ voice_id=voice_id,
18
+ model_id="eleven_multilingual_v2"
19
+ )
20
+
21
+ os.makedirs(AUDIO_DIR, exist_ok=True)
22
+ output_path = os.path.join(AUDIO_DIR, f"{audio_key}.mp3")
23
+
24
+ with open(output_path, "wb") as f:
25
+ for chunk in audio_stream:
26
+ if isinstance(chunk, bytes):
27
+ f.write(chunk)
28
+
29
+ logger.info(f"βœ… Audio saved to {output_path}")
30
+
31
+ except Exception as e:
32
+ logger.error(f"πŸ”₯ Error generating audio: {e}")
main.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import uuid
3
+ import logging
4
+ from datetime import datetime
5
+ from pathlib import Path
6
+ from fastapi import FastAPI, HTTPException, BackgroundTasks, Request
7
+ from fastapi.responses import StreamingResponse
8
+ from pydantic import BaseModel
9
+
10
+ from utils import (
11
+ sanitize_url,
12
+ crawl_documentation,
13
+ get_voice_prompt_style,
14
+ voice_map,
15
+ )
16
+ from ai_agents import Runner, setup_agents
17
+ from generate_audio import generate_audio
18
+
19
+ app = FastAPI()
20
+ Path("audio_outputs").mkdir(parents=True, exist_ok=True)
21
+ AUDIO_DIR = "audio_outputs"
22
+
23
+ logging.basicConfig(
24
+ filename="voice_agent.log",
25
+ filemode="w",
26
+ format="%(asctime)s | %(levelname)s | %(message)s",
27
+ level=logging.INFO,
28
+ )
29
+ logger = logging.getLogger(__name__)
30
+
31
+ class QueryRequest(BaseModel):
32
+ query: str
33
+ url: str = None
34
+ voice: str = None
35
+ file_text: str = None
36
+
37
+ from typing import Optional
38
+
39
+ class QueryResponse(BaseModel):
40
+ answer: str
41
+ audio_key: Optional[str] = None
42
+ sources: list = []
43
+ key_points: list[str] = []
44
+
45
+ @app.post("/process", response_model=QueryResponse)
46
+ async def process_query(req: QueryRequest, background_tasks: BackgroundTasks):
47
+ try:
48
+ start = datetime.now()
49
+ logger.info(f"🧠 Processing query: {req.query}")
50
+ logger.info(f"🌐 URL: {req.url}")
51
+ logger.info(f"πŸ“Ž File text preview: {req.file_text[:100] if req.file_text else 'None'}")
52
+ logger.info(f"πŸŽ™οΈ Voice: {req.voice}")
53
+
54
+ key_points = []
55
+ if req.file_text:
56
+ from ai_agents import Agent
57
+ extract_agent = Agent(
58
+ name="KeyPointAgent",
59
+ instructions="Extract the 5–7 most important key points from this content. Respond only as a bullet list.",
60
+ model="gpt-4o"
61
+ )
62
+ key_points_raw = await extract_agent.run(req.file_text)
63
+ key_points = [line.strip('-β€’* ').strip() for line in key_points_raw.splitlines() if line.strip()]
64
+ if not key_points:
65
+ logger.info('⚠️ No bullet points detected from GPT, using fallback.')
66
+ key_points = [key_points_raw.strip()]
67
+ logger.info(f'πŸ”Ž Final key points: {key_points}')
68
+
69
+ if req.url:
70
+ try:
71
+ content = crawl_documentation(req.url)
72
+ context = f"{content}\n\nNow answer the user's question: {req.query}"
73
+ except Exception as e:
74
+ logger.warning(f"⚠️ URL crawl failed: {e}")
75
+ context = f"Answer the following using your general knowledge:\n\n{req.query}"
76
+ elif req.file_text:
77
+ context = f"{req.file_text}\n\nNow answer the user's question: {req.query}"
78
+ else:
79
+ context = f"Answer the following using your general knowledge:\n\n{req.query}"
80
+
81
+ tone = get_voice_prompt_style(req.voice or "")
82
+ if tone:
83
+ context = tone + "\n\n" + context
84
+
85
+ processor, _ = setup_agents()
86
+ logger.info("🧠 Sending context to GPT")
87
+ answer = await Runner.run(processor, context)
88
+
89
+ if not answer:
90
+ raise HTTPException(status_code=500, detail="No GPT response.")
91
+
92
+ logger.info(f"βœ… GPT returned: {answer[:100]}...")
93
+ logger.info(f"πŸ€– GPT answer complete. ⏱️ {datetime.now() - start}")
94
+
95
+ audio_key = None
96
+ if req.voice and req.voice in voice_map:
97
+ voice_id = voice_map[req.voice]
98
+ audio_key = str(uuid.uuid4())
99
+
100
+ generate_audio(answer, voice_id, audio_key)
101
+ logger.info(f"πŸŽ™οΈ Audio generation triggered for voice: {req.voice}")
102
+
103
+ # βœ… Check if audio file actually exists
104
+ output_path = os.path.join(AUDIO_DIR, f"{audio_key}.mp3")
105
+ if not os.path.exists(output_path) or os.path.getsize(output_path) < 1000:
106
+ logger.warning("πŸ›‘ Audio generation failed or file is too small.")
107
+ audio_key = None
108
+ else:
109
+ logger.warning("πŸ›‘ Invalid voice")
110
+
111
+ return QueryResponse(answer=answer, audio_key=audio_key, sources=[], key_points=key_points)
112
+
113
+ except Exception as e:
114
+ logger.error(f"πŸ”₯ Internal error: {str(e)}")
115
+ import traceback
116
+ logger.error("".join(traceback.format_exception(None, e, e.__traceback__)))
117
+ raise HTTPException(status_code=500, detail=str(e))
118
+
119
+ @app.get("/get-audio/{key}")
120
+ async def get_audio(key: str, request: Request):
121
+ audio_path = os.path.join(AUDIO_DIR, f"{key}.mp3")
122
+ if not os.path.exists(audio_path):
123
+ raise HTTPException(status_code=404, detail="Audio not found")
124
+
125
+ if request.method == "HEAD":
126
+ return StreamingResponse(iter([]), status_code=200)
127
+
128
+ def iterfile():
129
+ with open(audio_path, mode="rb") as file:
130
+ yield from file
131
+ return StreamingResponse(iterfile(), media_type="audio/mpeg")
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ fastapi
3
+ uvicorn
4
+ qdrant-client
5
+ openai
6
+ fastembed
7
+ firecrawl
8
+ python-dotenv
9
+ requests
utils.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ from urllib.parse import urlparse
4
+ from bs4 import BeautifulSoup
5
+
6
+ AUDIO_DIR = "audio_outputs"
7
+
8
+ voice_map = {'grandma GG': 'rKVm0Cb9J2wrzmZupJea', 'tech wizard': 'ocn9CucaUfmmP6Two6Ik', 'perky sidekick': 'DWR3ijzKmphlRUhbBI7t', 'bill the newscaster': 'R1vZMopVRO75M5xBKX52', 'spunky charlie': 'q3yXDjF0aq4JCEo9u2g4', 'sassy teen': 'mBj2IDD9aXruPJHLGCAv'}
9
+
10
+ def sanitize_url(url):
11
+ if not url.startswith(("http://", "https://")):
12
+ return "https://" + url
13
+ return url
14
+
15
+ def extract_internal_links(html_content, base_url):
16
+ soup = BeautifulSoup(html_content, "html.parser")
17
+ parsed_base = urlparse(base_url)
18
+ base_domain = parsed_base.netloc
19
+
20
+ links = set()
21
+ for tag in soup.find_all("a", href=True):
22
+ href = tag["href"]
23
+ parsed_href = urlparse(href)
24
+
25
+ if parsed_href.netloc == "" or parsed_href.netloc == base_domain:
26
+ full_url = parsed_href.geturl()
27
+ if not full_url.startswith("http"):
28
+ full_url = f"{parsed_base.scheme}://{base_domain}{href}"
29
+ links.add(full_url)
30
+
31
+ return list(links)
32
+
33
+ def crawl_documentation(url):
34
+ import requests
35
+ try:
36
+ response = requests.get(url, timeout=10)
37
+ response.raise_for_status()
38
+ return response.text
39
+ except Exception as e:
40
+ return f"Error fetching page: {e}"
41
+
42
+ def get_voice_prompt_style(voice):
43
+ tone = {'grandma GG': 'dry, witty, and brutally honest β€” will roast you if you mess up.', 'tech wizard': 'cryptic, snarky, and a prodigy with code β€” speaks in digital spells.', 'perky sidekick': 'energetic, cheerful, and endlessly supportive β€” like a high-five machine.', 'bill the newscaster': 'polished, confident, and composed β€” delivers everything like breaking news.', 'spunky charlie': 'wildly curious, playful, and full of devil-may-care energy.', 'sassy teen': 'sarcastic, sharp-tongued, and too cool to care β€” flexes brainpower with attitude.'}
44
+ return tone.get(voice.lower(), "neutral")
45
+
46
+ def save_audio_file(audio_path, content):
47
+ os.makedirs(AUDIO_DIR, exist_ok=True)
48
+ with open(audio_path, "wb") as f:
49
+ f.write(content)
50
+
51
+ __all__ = [
52
+ "sanitize_url",
53
+ "extract_internal_links",
54
+ "crawl_documentation",
55
+ "get_voice_prompt_style",
56
+ "save_audio_file",
57
+ "voice_map",
58
+ "AUDIO_DIR",
59
+ ]