Spaces:

Bonosa2
/

AI-TALKS-BACK

Running

App Files Files Community

Bonosa2 commited on Apr 19

Commit

6a9fed6

verified ·

1 Parent(s): b053f6c

Upload 5 files

Browse files

Files changed (5) hide show

app.py +137 -0
generate_audio.py +32 -0
main.py +131 -0
requirements.txt +9 -0
utils.py +59 -0

app.py ADDED Viewed

	@@ -0,0 +1,137 @@

+import streamlit as st
+import requests
+import time
+import os
+API_URL = "http://127.0.0.1:8000"
+st.set_page_config(page_title="Voice Agent", layout="centered")
+st.title(" Ask Me Anything – With Voice!")
+st.caption("Summarized answers with expressive AI voices.")
+# 💚 Pulse animation CSS
+st.markdown("""
+    <style>
+    .pulse-box {
+        animation: pulse 1.5s ease-in-out;
+        border: 2px solid #4CAF50 !important;
+        border-radius: 0.5rem;
+        padding: 0.5rem;
+    }
+    @keyframes pulse {
+        0% { box-shadow: 0 0 0px rgba(0, 255, 0, 0.5); }
+        50% { box-shadow: 0 0 20px rgba(0, 255, 0, 0.9); }
+        100% { box-shadow: 0 0 0px rgba(0, 255, 0, 0.5); }
+    }
+    </style>
+""", unsafe_allow_html=True)
+voice_options = {
+    "Grandma GG": "grandma GG",
+    "Tech Wizard": "tech wizard",
+    "Perky Sidekick": "perky sidekick",
+    "Bill the Newscaster": "bill the newscaster",
+    "Spunky Charlie": "spunky charlie",
+    "Sassy Teen": "sassy teen"
+}
+st.sidebar.header("🔊 Voice Settings")
+voice_choice = st.sidebar.selectbox("Choose a voice:", list(voice_options.keys()))
+voice = voice_options[voice_choice]
+for key in ["query", "url", "file_text"]:
+    if key not in st.session_state:
+        st.session_state[key] = ""
+if st.button("🧹 Clear All"):
+    st.session_state.query = ""
+    st.session_state.url = ""
+    st.session_state.file_text = ""
+    st.rerun()
+# Bullet-to-query safe logic BEFORE rendering input box
+for i in range(10):
+    if st.session_state.get(f"use_bullet_{i}", False):
+        st.session_state["load_bullet_text"] = st.session_state.get(f"bullet_text_{i}", "")
+        st.rerun()
+pulse = False
+if "load_bullet_text" in st.session_state:
+    st.session_state.query = st.session_state["load_bullet_text"]
+    del st.session_state["load_bullet_text"]
+    pulse = True
+# Query box with optional pulse animation
+if pulse:
+    st.markdown('<div class="pulse-box">', unsafe_allow_html=True)
+    query = st.text_area("💬 Ask or refine something based on the bullets:", key="query")
+    st.markdown("</div>", unsafe_allow_html=True)
+else:
+    query = st.text_area("💬 Ask or refine something based on the bullets:", key="query")
+url = st.text_input("🌐 Optional URL to summarize:", placeholder="https://example.com", key="url")
+uploaded_file = st.file_uploader("📎 Or upload a file (PDF, TXT, DOCX)", type=["pdf", "txt", "docx"])
+file_text = ""
+if uploaded_file is not None:
+    file_text = uploaded_file.read().decode("utf-8", errors="ignore")
+    st.session_state.file_text = file_text
+    st.success("✅ File uploaded successfully!")
+else:
+    file_text = st.session_state.get("file_text", "")
+if st.button("🎯 Summarize"):
+    with st.spinner("Generating response..."):
+        try:
+            payload = {
+                "query": query,
+                "url": url,
+                "voice": voice,
+                "file_text": file_text
+            }
+            try:
+                response = requests.post(f"{API_URL}/process", json=payload, timeout=15)
+            except requests.exceptions.RequestException as req_err:
+                st.error(f"🚨 Connection to backend failed:\n{req_err}")
+                st.stop()
+            if response.status_code == 200:
+                data = response.json()
+                if not query.strip() and data.get("key_points"):
+                    st.markdown("### 📋 Key Points from File")
+                    for i, point in enumerate(data["key_points"]):
+                        st.markdown(f"- {point}")
+                        st.session_state[f"bullet_text_{i}"] = point
+                        st.button("↪️ Use This", key=f"use_bullet_{i}")
+                else:
+                    st.markdown("### 📜 Answer")
+                    st.success(data["answer"])
+                # ✅ Audio playback with retries
+                if data.get("audio_key"):
+                    audio_key = data["audio_key"]
+                    audio_url = f"{API_URL}/get-audio/{audio_key}"
+                    for attempt in range(10):
+                        try:
+                            audio_check = requests.get(audio_url)
+                            if audio_check.status_code == 200:
+                                audio_bytes = audio_check.content
+                                if audio_bytes and len(audio_bytes) > 2000:
+                                    st.audio(audio_bytes, format="audio/mp3")
+                                    break
+                                else:
+                                    st.warning("⚠️ Audio not ready yet. Waiting...")
+                            else:
+                                st.warning("⚠️ Couldn't reach audio endpoint.")
+                        except Exception as e:
+                            st.warning(f"⚠️ Audio request failed: {e}")
+                        time.sleep(0.5)
+                    else:
+                        st.warning("⚠️ Audio not ready or empty after retries.")
+            else:
+                st.error(f"❌ Backend error: {response.status_code} - {response.text}")
+        except Exception as e:
+            st.error(f"🔥 Unexpected error:\n{e}")

generate_audio.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import os
+import logging
+from elevenlabs import stream
+from elevenlabs.client import ElevenLabs
+AUDIO_DIR = "audio_outputs"
+logger = logging.getLogger(__name__)
+client = ElevenLabs(api_key=os.getenv("ELEVENLABS_API_KEY"))
+def generate_audio(text: str, voice_id: str, audio_key: str):
+    try:
+        logger.info("🎯 Starting ElevenLabs audio generation")
+        audio_stream = client.text_to_speech.convert_as_stream(
+            text=text,
+            voice_id=voice_id,
+            model_id="eleven_multilingual_v2"
+        )
+        os.makedirs(AUDIO_DIR, exist_ok=True)
+        output_path = os.path.join(AUDIO_DIR, f"{audio_key}.mp3")
+        with open(output_path, "wb") as f:
+            for chunk in audio_stream:
+                if isinstance(chunk, bytes):
+                    f.write(chunk)
+        logger.info(f"✅ Audio saved to {output_path}")
+    except Exception as e:
+        logger.error(f"🔥 Error generating audio: {e}")

main.py ADDED Viewed

	@@ -0,0 +1,131 @@

+import os
+import uuid
+import logging
+from datetime import datetime
+from pathlib import Path
+from fastapi import FastAPI, HTTPException, BackgroundTasks, Request
+from fastapi.responses import StreamingResponse
+from pydantic import BaseModel
+from utils import (
+    sanitize_url,
+    crawl_documentation,
+    get_voice_prompt_style,
+    voice_map,
+)
+from ai_agents import Runner, setup_agents
+from generate_audio import generate_audio
+app = FastAPI()
+Path("audio_outputs").mkdir(parents=True, exist_ok=True)
+AUDIO_DIR = "audio_outputs"
+logging.basicConfig(
+    filename="voice_agent.log",
+    filemode="w",
+    format="%(asctime)s | %(levelname)s | %(message)s",
+    level=logging.INFO,
+)
+logger = logging.getLogger(__name__)
+class QueryRequest(BaseModel):
+    query: str
+    url: str = None
+    voice: str = None
+    file_text: str = None
+from typing import Optional
+class QueryResponse(BaseModel):
+    answer: str
+    audio_key: Optional[str] = None
+    sources: list = []
+    key_points: list[str] = []
+@app.post("/process", response_model=QueryResponse)
+async def process_query(req: QueryRequest, background_tasks: BackgroundTasks):
+    try:
+        start = datetime.now()
+        logger.info(f"🧠 Processing query: {req.query}")
+        logger.info(f"🌐 URL: {req.url}")
+        logger.info(f"📎 File text preview: {req.file_text[:100] if req.file_text else 'None'}")
+        logger.info(f"🎙️ Voice: {req.voice}")
+        key_points = []
+        if req.file_text:
+            from ai_agents import Agent
+            extract_agent = Agent(
+                name="KeyPointAgent",
+                instructions="Extract the 5–7 most important key points from this content. Respond only as a bullet list.",
+                model="gpt-4o"
+            )
+            key_points_raw = await extract_agent.run(req.file_text)
+            key_points = [line.strip('-•* ').strip() for line in key_points_raw.splitlines() if line.strip()]
+            if not key_points:
+                logger.info('⚠️ No bullet points detected from GPT, using fallback.')
+                key_points = [key_points_raw.strip()]
+            logger.info(f'🔎 Final key points: {key_points}')
+        if req.url:
+            try:
+                content = crawl_documentation(req.url)
+                context = f"{content}\n\nNow answer the user's question: {req.query}"
+            except Exception as e:
+                logger.warning(f"⚠️ URL crawl failed: {e}")
+                context = f"Answer the following using your general knowledge:\n\n{req.query}"
+        elif req.file_text:
+            context = f"{req.file_text}\n\nNow answer the user's question: {req.query}"
+        else:
+            context = f"Answer the following using your general knowledge:\n\n{req.query}"
+        tone = get_voice_prompt_style(req.voice or "")
+        if tone:
+            context = tone + "\n\n" + context
+        processor, _ = setup_agents()
+        logger.info("🧠 Sending context to GPT")
+        answer = await Runner.run(processor, context)
+        if not answer:
+            raise HTTPException(status_code=500, detail="No GPT response.")
+        logger.info(f"✅ GPT returned: {answer[:100]}...")
+        logger.info(f"🤖 GPT answer complete. ⏱️ {datetime.now() - start}")
+        audio_key = None
+        if req.voice and req.voice in voice_map:
+            voice_id = voice_map[req.voice]
+            audio_key = str(uuid.uuid4())
+            generate_audio(answer, voice_id, audio_key)
+            logger.info(f"🎙️ Audio generation triggered for voice: {req.voice}")
+            # ✅ Check if audio file actually exists
+            output_path = os.path.join(AUDIO_DIR, f"{audio_key}.mp3")
+            if not os.path.exists(output_path) or os.path.getsize(output_path) < 1000:
+                logger.warning("🛑 Audio generation failed or file is too small.")
+                audio_key = None
+        else:
+            logger.warning("🛑 Invalid voice")
+        return QueryResponse(answer=answer, audio_key=audio_key, sources=[], key_points=key_points)
+    except Exception as e:
+        logger.error(f"🔥 Internal error: {str(e)}")
+        import traceback
+        logger.error("".join(traceback.format_exception(None, e, e.__traceback__)))
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/get-audio/{key}")
+async def get_audio(key: str, request: Request):
+    audio_path = os.path.join(AUDIO_DIR, f"{key}.mp3")
+    if not os.path.exists(audio_path):
+        raise HTTPException(status_code=404, detail="Audio not found")
+    if request.method == "HEAD":
+        return StreamingResponse(iter([]), status_code=200)
+    def iterfile():
+        with open(audio_path, mode="rb") as file:
+            yield from file
+    return StreamingResponse(iterfile(), media_type="audio/mpeg")

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+streamlit
+fastapi
+uvicorn
+qdrant-client
+openai
+fastembed
+firecrawl
+python-dotenv
+requests

utils.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import os
+import re
+from urllib.parse import urlparse
+from bs4 import BeautifulSoup
+AUDIO_DIR = "audio_outputs"
+voice_map = {'grandma GG': 'rKVm0Cb9J2wrzmZupJea', 'tech wizard': 'ocn9CucaUfmmP6Two6Ik', 'perky sidekick': 'DWR3ijzKmphlRUhbBI7t', 'bill the newscaster': 'R1vZMopVRO75M5xBKX52', 'spunky charlie': 'q3yXDjF0aq4JCEo9u2g4', 'sassy teen': 'mBj2IDD9aXruPJHLGCAv'}
+def sanitize_url(url):
+    if not url.startswith(("http://", "https://")):
+        return "https://" + url
+    return url
+def extract_internal_links(html_content, base_url):
+    soup = BeautifulSoup(html_content, "html.parser")
+    parsed_base = urlparse(base_url)
+    base_domain = parsed_base.netloc
+    links = set()
+    for tag in soup.find_all("a", href=True):
+        href = tag["href"]
+        parsed_href = urlparse(href)
+        if parsed_href.netloc == "" or parsed_href.netloc == base_domain:
+            full_url = parsed_href.geturl()
+            if not full_url.startswith("http"):
+                full_url = f"{parsed_base.scheme}://{base_domain}{href}"
+            links.add(full_url)
+    return list(links)
+def crawl_documentation(url):
+    import requests
+    try:
+        response = requests.get(url, timeout=10)
+        response.raise_for_status()
+        return response.text
+    except Exception as e:
+        return f"Error fetching page: {e}"
+def get_voice_prompt_style(voice):
+    tone = {'grandma GG': 'dry, witty, and brutally honest — will roast you if you mess up.', 'tech wizard': 'cryptic, snarky, and a prodigy with code — speaks in digital spells.', 'perky sidekick': 'energetic, cheerful, and endlessly supportive — like a high-five machine.', 'bill the newscaster': 'polished, confident, and composed — delivers everything like breaking news.', 'spunky charlie': 'wildly curious, playful, and full of devil-may-care energy.', 'sassy teen': 'sarcastic, sharp-tongued, and too cool to care — flexes brainpower with attitude.'}
+    return tone.get(voice.lower(), "neutral")
+def save_audio_file(audio_path, content):
+    os.makedirs(AUDIO_DIR, exist_ok=True)
+    with open(audio_path, "wb") as f:
+        f.write(content)
+__all__ = [
+    "sanitize_url",
+    "extract_internal_links",
+    "crawl_documentation",
+    "get_voice_prompt_style",
+    "save_audio_file",
+    "voice_map",
+    "AUDIO_DIR",
+]