AI-TALKS-BACK / app.py
Bonosa2's picture
Upload app.py
3819c76 verified
import os
import uuid
import json
import fitz
import requests
import streamlit as st
from io import BytesIO
from docx import Document
from dotenv import load_dotenv
from elevenlabs.client import ElevenLabs
from utils import voice_map, get_voice_prompt_style, AUDIO_DIR
from generate_audio import generate_audio
from logger_setup import logger
# Load API keys
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
client = ElevenLabs(api_key=ELEVENLABS_API_KEY)
# Streamlit config
st.set_page_config(page_title="Voice Agent Pro", page_icon="🎧")
logger.info("🎬 Streamlit app started")
# Inject large fonts + tips
st.markdown("""
<style>
.big-title {
font-size: 2.4em !important;
font-weight: bold;
color: #333333;
text-align: center;
}
.big-answer {
font-size: 1.6em;
line-height: 1.5;
color: #111;
}
textarea, input {
font-size: 1.2em !important;
}
.instructions {
font-size: 1.1em;
padding: 0.5em;
background-color: #f0f4ff;
border-radius: 0.5em;
margin-bottom: 1em;
}
</style>
""", unsafe_allow_html=True)
st.markdown('<div class="big-title">🎧 Voice Agent Pro</div>', unsafe_allow_html=True)
st.markdown("""<div class="instructions">Ask a question <b>OR</b> paste a URL <b>OR</b> upload a file β€” and I'll summarize it in bullet points with expressive AI narration!</div>""", unsafe_allow_html=True)
# Voice selection
st.sidebar.header("🎚️ Voice Settings")
voice_label = st.sidebar.selectbox("Choose a voice:", list(voice_map.keys()))
voice_id = voice_map[voice_label]
tone_prompt = get_voice_prompt_style(voice_label)
font_size = st.sidebar.radio("Font Size", ["Normal", "Large"])
font_class = "big-answer" if font_size == "Large" else ""
# Add Bolt attribution to sidebar
st.sidebar.markdown("---")
st.sidebar.markdown("⚑ Made with [bolt.new](https://bolt.new)")
# One-liners per voice
preview_lines = {
"grandma GG": "Back in my day, we didn’t need AI to sound this fabulous.",
"tech wizard": "System online. You may now enter your query, human.",
"perky sidekick": "You got this! Let’s answer that question together!",
"bill the newscaster": "Breaking news β€” you’ve just selected the perfect voice.",
"spunky charlie": "Whoa! Is it story time already? Let’s go!",
"sassy teen": "Seriously? You better ask something cool."
}
preview_line = preview_lines.get(voice_label, "Testing voice.")
st.markdown(f"🎧 <b>{voice_label}</b> says:", unsafe_allow_html=True)
st.markdown(f"_{preview_line}_", unsafe_allow_html=True)
# Stream preview audio (no autoplay)
try:
audio_stream = client.text_to_speech.convert(
text=preview_line,
voice_id=voice_id,
model_id="eleven_multilingual_v2"
)
full_audio_content = b""
for chunk in audio_stream:
full_audio_content += chunk
st.audio(full_audio_content)
except Exception as e:
st.warning("Voice preview unavailable.")
logger.exception("🎧 Voice preview error")
# Session state
if "answer" not in st.session_state: st.session_state.answer = ""
if "audio_key" not in st.session_state: st.session_state.audio_key = None
if "file_text" not in st.session_state: st.session_state.file_text = ""
if "key_points" not in st.session_state: st.session_state.key_points = []
# Inputs
query = st.text_area("πŸ—¨οΈ Ask your question:", value="", placeholder="Ask your question", key="query")
url = st.text_input("🌐 Or paste a URL:")
uploaded_file = st.file_uploader("πŸ“Ž Or upload a file (PDF, TXT, DOCX)", type=["pdf", "txt", "docx"])
# File reader
def extract_text_from_file(file):
file_type = file.name.split('.')[-1].lower()
if file_type == "pdf":
try:
with fitz.open(stream=file.read(), filetype="pdf") as doc:
return "\n".join(page.get_text() for page in doc)
except Exception as e:
logger.error(f"❌ PDF read failed: {e}")
return "Failed to read the PDF."
elif file_type == "txt":
return file.read().decode("utf-8", errors="ignore")
elif file_type == "docx":
try:
doc = Document(file)
return "\n".join(p.text for p in doc.paragraphs)
except Exception as e:
logger.error(f"❌ DOCX read failed: {e}")
return "Failed to read the DOCX file."
return "Unsupported file type."
if uploaded_file:
st.session_state.file_text = extract_text_from_file(uploaded_file)
logger.info(f"πŸ“„ Extracted from file: {uploaded_file.name}")
# Clear app
if st.button("🧹 Clear All"):
logger.info("🧼 Reset clicked")
st.rerun()
# GPT streaming
def stream_openai_response(payload, headers):
with requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload, stream=True) as r:
for line in r.iter_lines():
if line and line.startswith(b"data: "):
yield line[len(b"data: "):].decode()
# Summarize
if st.button("πŸ” Summarize"):
if not query and not url and not uploaded_file:
st.warning("Please enter a question, a URL, or upload a file.")
logger.warning("⚠️ Summarize clicked with no input")
else:
with st.spinner("Talking to GPT..."):
try:
context = ""
if st.session_state.file_text:
context += st.session_state.file_text + "\n\n"
if url:
context += f"Summarize this page: {url}\n\n"
context += (
"You are a voice assistant with the following tone:\n"
f"{tone_prompt}\n\n"
)
if query.strip():
context += f"Now answer this in bullet points:\n{query}"
else:
context += "Summarize the content above in bullet points."
headers = {"Authorization": f"Bearer {OPENAI_API_KEY}"}
payload = {
"model": "gpt-4o",
"messages": [{"role": "user", "content": context}],
"temperature": 0.7,
"stream": True
}
st.session_state.answer = ""
answer_box = st.empty()
logger.info("🧠 GPT stream started")
for chunk in stream_openai_response(payload, headers):
if chunk.strip() == "[DONE]":
logger.info("🟒 GPT done")
continue
try:
parsed = json.loads(chunk)
delta = parsed['choices'][0]['delta'].get('content', '')
st.session_state.answer += delta
answer_box.markdown(f'<div class="{font_class}">{st.session_state.answer}</div>', unsafe_allow_html=True)
except json.JSONDecodeError:
logger.warning(f"⚠️ Non-JSON chunk skipped: {chunk}")
continue
audio_key = str(uuid.uuid4())
generate_audio(st.session_state.answer, voice_id, audio_key)
st.session_state.audio_key = audio_key
logger.info(f"🎧 Audio ready: {audio_key}")
except Exception as e:
st.error(f"πŸ”₯ Error: {e}")
logger.exception("πŸ”₯ GPT/audio failed")
# Output
if st.session_state.answer:
st.subheader("πŸ“œ Answer")
st.success(st.session_state.answer)
if st.session_state.audio_key:
audio_path = os.path.join(AUDIO_DIR, f"{st.session_state.audio_key}.mp3")
if os.path.exists(audio_path):
st.audio(audio_path)
else:
st.error("❗ Audio file missing.")
logger.warning(f"❌ Missing audio file: {audio_path}")