Spaces:
Running
Running
Upload 5 files
Browse files- app.py +137 -0
- generate_audio.py +32 -0
- main.py +131 -0
- requirements.txt +9 -0
- utils.py +59 -0
app.py
ADDED
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import requests
|
3 |
+
import time
|
4 |
+
import os
|
5 |
+
|
6 |
+
API_URL = "http://127.0.0.1:8000"
|
7 |
+
|
8 |
+
st.set_page_config(page_title="Voice Agent", layout="centered")
|
9 |
+
st.title(" Ask Me Anything β With Voice!")
|
10 |
+
st.caption("Summarized answers with expressive AI voices.")
|
11 |
+
|
12 |
+
# π Pulse animation CSS
|
13 |
+
st.markdown("""
|
14 |
+
<style>
|
15 |
+
.pulse-box {
|
16 |
+
animation: pulse 1.5s ease-in-out;
|
17 |
+
border: 2px solid #4CAF50 !important;
|
18 |
+
border-radius: 0.5rem;
|
19 |
+
padding: 0.5rem;
|
20 |
+
}
|
21 |
+
@keyframes pulse {
|
22 |
+
0% { box-shadow: 0 0 0px rgba(0, 255, 0, 0.5); }
|
23 |
+
50% { box-shadow: 0 0 20px rgba(0, 255, 0, 0.9); }
|
24 |
+
100% { box-shadow: 0 0 0px rgba(0, 255, 0, 0.5); }
|
25 |
+
}
|
26 |
+
</style>
|
27 |
+
""", unsafe_allow_html=True)
|
28 |
+
|
29 |
+
voice_options = {
|
30 |
+
"Grandma GG": "grandma GG",
|
31 |
+
"Tech Wizard": "tech wizard",
|
32 |
+
"Perky Sidekick": "perky sidekick",
|
33 |
+
"Bill the Newscaster": "bill the newscaster",
|
34 |
+
"Spunky Charlie": "spunky charlie",
|
35 |
+
"Sassy Teen": "sassy teen"
|
36 |
+
}
|
37 |
+
|
38 |
+
st.sidebar.header("π Voice Settings")
|
39 |
+
voice_choice = st.sidebar.selectbox("Choose a voice:", list(voice_options.keys()))
|
40 |
+
voice = voice_options[voice_choice]
|
41 |
+
|
42 |
+
for key in ["query", "url", "file_text"]:
|
43 |
+
if key not in st.session_state:
|
44 |
+
st.session_state[key] = ""
|
45 |
+
|
46 |
+
if st.button("π§Ή Clear All"):
|
47 |
+
st.session_state.query = ""
|
48 |
+
st.session_state.url = ""
|
49 |
+
st.session_state.file_text = ""
|
50 |
+
st.rerun()
|
51 |
+
|
52 |
+
# Bullet-to-query safe logic BEFORE rendering input box
|
53 |
+
for i in range(10):
|
54 |
+
if st.session_state.get(f"use_bullet_{i}", False):
|
55 |
+
st.session_state["load_bullet_text"] = st.session_state.get(f"bullet_text_{i}", "")
|
56 |
+
st.rerun()
|
57 |
+
|
58 |
+
pulse = False
|
59 |
+
if "load_bullet_text" in st.session_state:
|
60 |
+
st.session_state.query = st.session_state["load_bullet_text"]
|
61 |
+
del st.session_state["load_bullet_text"]
|
62 |
+
pulse = True
|
63 |
+
|
64 |
+
# Query box with optional pulse animation
|
65 |
+
if pulse:
|
66 |
+
st.markdown('<div class="pulse-box">', unsafe_allow_html=True)
|
67 |
+
query = st.text_area("π¬ Ask or refine something based on the bullets:", key="query")
|
68 |
+
st.markdown("</div>", unsafe_allow_html=True)
|
69 |
+
else:
|
70 |
+
query = st.text_area("π¬ Ask or refine something based on the bullets:", key="query")
|
71 |
+
|
72 |
+
url = st.text_input("π Optional URL to summarize:", placeholder="https://example.com", key="url")
|
73 |
+
uploaded_file = st.file_uploader("π Or upload a file (PDF, TXT, DOCX)", type=["pdf", "txt", "docx"])
|
74 |
+
|
75 |
+
file_text = ""
|
76 |
+
if uploaded_file is not None:
|
77 |
+
file_text = uploaded_file.read().decode("utf-8", errors="ignore")
|
78 |
+
st.session_state.file_text = file_text
|
79 |
+
st.success("β
File uploaded successfully!")
|
80 |
+
else:
|
81 |
+
file_text = st.session_state.get("file_text", "")
|
82 |
+
|
83 |
+
if st.button("π― Summarize"):
|
84 |
+
with st.spinner("Generating response..."):
|
85 |
+
try:
|
86 |
+
payload = {
|
87 |
+
"query": query,
|
88 |
+
"url": url,
|
89 |
+
"voice": voice,
|
90 |
+
"file_text": file_text
|
91 |
+
}
|
92 |
+
|
93 |
+
try:
|
94 |
+
response = requests.post(f"{API_URL}/process", json=payload, timeout=15)
|
95 |
+
except requests.exceptions.RequestException as req_err:
|
96 |
+
st.error(f"π¨ Connection to backend failed:\n{req_err}")
|
97 |
+
st.stop()
|
98 |
+
|
99 |
+
if response.status_code == 200:
|
100 |
+
data = response.json()
|
101 |
+
|
102 |
+
if not query.strip() and data.get("key_points"):
|
103 |
+
st.markdown("### π Key Points from File")
|
104 |
+
for i, point in enumerate(data["key_points"]):
|
105 |
+
st.markdown(f"- {point}")
|
106 |
+
st.session_state[f"bullet_text_{i}"] = point
|
107 |
+
st.button("βͺοΈ Use This", key=f"use_bullet_{i}")
|
108 |
+
else:
|
109 |
+
st.markdown("### π Answer")
|
110 |
+
st.success(data["answer"])
|
111 |
+
|
112 |
+
# β
Audio playback with retries
|
113 |
+
if data.get("audio_key"):
|
114 |
+
audio_key = data["audio_key"]
|
115 |
+
audio_url = f"{API_URL}/get-audio/{audio_key}"
|
116 |
+
|
117 |
+
for attempt in range(10):
|
118 |
+
try:
|
119 |
+
audio_check = requests.get(audio_url)
|
120 |
+
if audio_check.status_code == 200:
|
121 |
+
audio_bytes = audio_check.content
|
122 |
+
if audio_bytes and len(audio_bytes) > 2000:
|
123 |
+
st.audio(audio_bytes, format="audio/mp3")
|
124 |
+
break
|
125 |
+
else:
|
126 |
+
st.warning("β οΈ Audio not ready yet. Waiting...")
|
127 |
+
else:
|
128 |
+
st.warning("β οΈ Couldn't reach audio endpoint.")
|
129 |
+
except Exception as e:
|
130 |
+
st.warning(f"β οΈ Audio request failed: {e}")
|
131 |
+
time.sleep(0.5)
|
132 |
+
else:
|
133 |
+
st.warning("β οΈ Audio not ready or empty after retries.")
|
134 |
+
else:
|
135 |
+
st.error(f"β Backend error: {response.status_code} - {response.text}")
|
136 |
+
except Exception as e:
|
137 |
+
st.error(f"π₯ Unexpected error:\n{e}")
|
generate_audio.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import logging
|
3 |
+
from elevenlabs import stream
|
4 |
+
from elevenlabs.client import ElevenLabs
|
5 |
+
|
6 |
+
AUDIO_DIR = "audio_outputs"
|
7 |
+
logger = logging.getLogger(__name__)
|
8 |
+
|
9 |
+
client = ElevenLabs(api_key=os.getenv("ELEVENLABS_API_KEY"))
|
10 |
+
|
11 |
+
def generate_audio(text: str, voice_id: str, audio_key: str):
|
12 |
+
try:
|
13 |
+
logger.info("π― Starting ElevenLabs audio generation")
|
14 |
+
|
15 |
+
audio_stream = client.text_to_speech.convert_as_stream(
|
16 |
+
text=text,
|
17 |
+
voice_id=voice_id,
|
18 |
+
model_id="eleven_multilingual_v2"
|
19 |
+
)
|
20 |
+
|
21 |
+
os.makedirs(AUDIO_DIR, exist_ok=True)
|
22 |
+
output_path = os.path.join(AUDIO_DIR, f"{audio_key}.mp3")
|
23 |
+
|
24 |
+
with open(output_path, "wb") as f:
|
25 |
+
for chunk in audio_stream:
|
26 |
+
if isinstance(chunk, bytes):
|
27 |
+
f.write(chunk)
|
28 |
+
|
29 |
+
logger.info(f"β
Audio saved to {output_path}")
|
30 |
+
|
31 |
+
except Exception as e:
|
32 |
+
logger.error(f"π₯ Error generating audio: {e}")
|
main.py
ADDED
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import uuid
|
3 |
+
import logging
|
4 |
+
from datetime import datetime
|
5 |
+
from pathlib import Path
|
6 |
+
from fastapi import FastAPI, HTTPException, BackgroundTasks, Request
|
7 |
+
from fastapi.responses import StreamingResponse
|
8 |
+
from pydantic import BaseModel
|
9 |
+
|
10 |
+
from utils import (
|
11 |
+
sanitize_url,
|
12 |
+
crawl_documentation,
|
13 |
+
get_voice_prompt_style,
|
14 |
+
voice_map,
|
15 |
+
)
|
16 |
+
from ai_agents import Runner, setup_agents
|
17 |
+
from generate_audio import generate_audio
|
18 |
+
|
19 |
+
app = FastAPI()
|
20 |
+
Path("audio_outputs").mkdir(parents=True, exist_ok=True)
|
21 |
+
AUDIO_DIR = "audio_outputs"
|
22 |
+
|
23 |
+
logging.basicConfig(
|
24 |
+
filename="voice_agent.log",
|
25 |
+
filemode="w",
|
26 |
+
format="%(asctime)s | %(levelname)s | %(message)s",
|
27 |
+
level=logging.INFO,
|
28 |
+
)
|
29 |
+
logger = logging.getLogger(__name__)
|
30 |
+
|
31 |
+
class QueryRequest(BaseModel):
|
32 |
+
query: str
|
33 |
+
url: str = None
|
34 |
+
voice: str = None
|
35 |
+
file_text: str = None
|
36 |
+
|
37 |
+
from typing import Optional
|
38 |
+
|
39 |
+
class QueryResponse(BaseModel):
|
40 |
+
answer: str
|
41 |
+
audio_key: Optional[str] = None
|
42 |
+
sources: list = []
|
43 |
+
key_points: list[str] = []
|
44 |
+
|
45 |
+
@app.post("/process", response_model=QueryResponse)
|
46 |
+
async def process_query(req: QueryRequest, background_tasks: BackgroundTasks):
|
47 |
+
try:
|
48 |
+
start = datetime.now()
|
49 |
+
logger.info(f"π§ Processing query: {req.query}")
|
50 |
+
logger.info(f"π URL: {req.url}")
|
51 |
+
logger.info(f"π File text preview: {req.file_text[:100] if req.file_text else 'None'}")
|
52 |
+
logger.info(f"ποΈ Voice: {req.voice}")
|
53 |
+
|
54 |
+
key_points = []
|
55 |
+
if req.file_text:
|
56 |
+
from ai_agents import Agent
|
57 |
+
extract_agent = Agent(
|
58 |
+
name="KeyPointAgent",
|
59 |
+
instructions="Extract the 5β7 most important key points from this content. Respond only as a bullet list.",
|
60 |
+
model="gpt-4o"
|
61 |
+
)
|
62 |
+
key_points_raw = await extract_agent.run(req.file_text)
|
63 |
+
key_points = [line.strip('-β’* ').strip() for line in key_points_raw.splitlines() if line.strip()]
|
64 |
+
if not key_points:
|
65 |
+
logger.info('β οΈ No bullet points detected from GPT, using fallback.')
|
66 |
+
key_points = [key_points_raw.strip()]
|
67 |
+
logger.info(f'π Final key points: {key_points}')
|
68 |
+
|
69 |
+
if req.url:
|
70 |
+
try:
|
71 |
+
content = crawl_documentation(req.url)
|
72 |
+
context = f"{content}\n\nNow answer the user's question: {req.query}"
|
73 |
+
except Exception as e:
|
74 |
+
logger.warning(f"β οΈ URL crawl failed: {e}")
|
75 |
+
context = f"Answer the following using your general knowledge:\n\n{req.query}"
|
76 |
+
elif req.file_text:
|
77 |
+
context = f"{req.file_text}\n\nNow answer the user's question: {req.query}"
|
78 |
+
else:
|
79 |
+
context = f"Answer the following using your general knowledge:\n\n{req.query}"
|
80 |
+
|
81 |
+
tone = get_voice_prompt_style(req.voice or "")
|
82 |
+
if tone:
|
83 |
+
context = tone + "\n\n" + context
|
84 |
+
|
85 |
+
processor, _ = setup_agents()
|
86 |
+
logger.info("π§ Sending context to GPT")
|
87 |
+
answer = await Runner.run(processor, context)
|
88 |
+
|
89 |
+
if not answer:
|
90 |
+
raise HTTPException(status_code=500, detail="No GPT response.")
|
91 |
+
|
92 |
+
logger.info(f"β
GPT returned: {answer[:100]}...")
|
93 |
+
logger.info(f"π€ GPT answer complete. β±οΈ {datetime.now() - start}")
|
94 |
+
|
95 |
+
audio_key = None
|
96 |
+
if req.voice and req.voice in voice_map:
|
97 |
+
voice_id = voice_map[req.voice]
|
98 |
+
audio_key = str(uuid.uuid4())
|
99 |
+
|
100 |
+
generate_audio(answer, voice_id, audio_key)
|
101 |
+
logger.info(f"ποΈ Audio generation triggered for voice: {req.voice}")
|
102 |
+
|
103 |
+
# β
Check if audio file actually exists
|
104 |
+
output_path = os.path.join(AUDIO_DIR, f"{audio_key}.mp3")
|
105 |
+
if not os.path.exists(output_path) or os.path.getsize(output_path) < 1000:
|
106 |
+
logger.warning("π Audio generation failed or file is too small.")
|
107 |
+
audio_key = None
|
108 |
+
else:
|
109 |
+
logger.warning("π Invalid voice")
|
110 |
+
|
111 |
+
return QueryResponse(answer=answer, audio_key=audio_key, sources=[], key_points=key_points)
|
112 |
+
|
113 |
+
except Exception as e:
|
114 |
+
logger.error(f"π₯ Internal error: {str(e)}")
|
115 |
+
import traceback
|
116 |
+
logger.error("".join(traceback.format_exception(None, e, e.__traceback__)))
|
117 |
+
raise HTTPException(status_code=500, detail=str(e))
|
118 |
+
|
119 |
+
@app.get("/get-audio/{key}")
|
120 |
+
async def get_audio(key: str, request: Request):
|
121 |
+
audio_path = os.path.join(AUDIO_DIR, f"{key}.mp3")
|
122 |
+
if not os.path.exists(audio_path):
|
123 |
+
raise HTTPException(status_code=404, detail="Audio not found")
|
124 |
+
|
125 |
+
if request.method == "HEAD":
|
126 |
+
return StreamingResponse(iter([]), status_code=200)
|
127 |
+
|
128 |
+
def iterfile():
|
129 |
+
with open(audio_path, mode="rb") as file:
|
130 |
+
yield from file
|
131 |
+
return StreamingResponse(iterfile(), media_type="audio/mpeg")
|
requirements.txt
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
fastapi
|
3 |
+
uvicorn
|
4 |
+
qdrant-client
|
5 |
+
openai
|
6 |
+
fastembed
|
7 |
+
firecrawl
|
8 |
+
python-dotenv
|
9 |
+
requests
|
utils.py
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import re
|
3 |
+
from urllib.parse import urlparse
|
4 |
+
from bs4 import BeautifulSoup
|
5 |
+
|
6 |
+
AUDIO_DIR = "audio_outputs"
|
7 |
+
|
8 |
+
voice_map = {'grandma GG': 'rKVm0Cb9J2wrzmZupJea', 'tech wizard': 'ocn9CucaUfmmP6Two6Ik', 'perky sidekick': 'DWR3ijzKmphlRUhbBI7t', 'bill the newscaster': 'R1vZMopVRO75M5xBKX52', 'spunky charlie': 'q3yXDjF0aq4JCEo9u2g4', 'sassy teen': 'mBj2IDD9aXruPJHLGCAv'}
|
9 |
+
|
10 |
+
def sanitize_url(url):
|
11 |
+
if not url.startswith(("http://", "https://")):
|
12 |
+
return "https://" + url
|
13 |
+
return url
|
14 |
+
|
15 |
+
def extract_internal_links(html_content, base_url):
|
16 |
+
soup = BeautifulSoup(html_content, "html.parser")
|
17 |
+
parsed_base = urlparse(base_url)
|
18 |
+
base_domain = parsed_base.netloc
|
19 |
+
|
20 |
+
links = set()
|
21 |
+
for tag in soup.find_all("a", href=True):
|
22 |
+
href = tag["href"]
|
23 |
+
parsed_href = urlparse(href)
|
24 |
+
|
25 |
+
if parsed_href.netloc == "" or parsed_href.netloc == base_domain:
|
26 |
+
full_url = parsed_href.geturl()
|
27 |
+
if not full_url.startswith("http"):
|
28 |
+
full_url = f"{parsed_base.scheme}://{base_domain}{href}"
|
29 |
+
links.add(full_url)
|
30 |
+
|
31 |
+
return list(links)
|
32 |
+
|
33 |
+
def crawl_documentation(url):
|
34 |
+
import requests
|
35 |
+
try:
|
36 |
+
response = requests.get(url, timeout=10)
|
37 |
+
response.raise_for_status()
|
38 |
+
return response.text
|
39 |
+
except Exception as e:
|
40 |
+
return f"Error fetching page: {e}"
|
41 |
+
|
42 |
+
def get_voice_prompt_style(voice):
|
43 |
+
tone = {'grandma GG': 'dry, witty, and brutally honest β will roast you if you mess up.', 'tech wizard': 'cryptic, snarky, and a prodigy with code β speaks in digital spells.', 'perky sidekick': 'energetic, cheerful, and endlessly supportive β like a high-five machine.', 'bill the newscaster': 'polished, confident, and composed β delivers everything like breaking news.', 'spunky charlie': 'wildly curious, playful, and full of devil-may-care energy.', 'sassy teen': 'sarcastic, sharp-tongued, and too cool to care β flexes brainpower with attitude.'}
|
44 |
+
return tone.get(voice.lower(), "neutral")
|
45 |
+
|
46 |
+
def save_audio_file(audio_path, content):
|
47 |
+
os.makedirs(AUDIO_DIR, exist_ok=True)
|
48 |
+
with open(audio_path, "wb") as f:
|
49 |
+
f.write(content)
|
50 |
+
|
51 |
+
__all__ = [
|
52 |
+
"sanitize_url",
|
53 |
+
"extract_internal_links",
|
54 |
+
"crawl_documentation",
|
55 |
+
"get_voice_prompt_style",
|
56 |
+
"save_audio_file",
|
57 |
+
"voice_map",
|
58 |
+
"AUDIO_DIR",
|
59 |
+
]
|