import os import uuid import json import fitz import requests import streamlit as st from io import BytesIO from docx import Document from dotenv import load_dotenv from elevenlabs.client import ElevenLabs from utils import voice_map, get_voice_prompt_style, AUDIO_DIR from generate_audio import generate_audio from logger_setup import logger # Load API keys load_dotenv() OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY") client = ElevenLabs(api_key=ELEVENLABS_API_KEY) # Streamlit config st.set_page_config(page_title="Voice Agent Pro", page_icon="🎧") logger.info("🎬 Streamlit app started") # Inject large fonts + tips st.markdown(""" """, unsafe_allow_html=True) st.markdown('
🎧 Voice Agent Pro
', unsafe_allow_html=True) st.markdown("""
Ask a question OR paste a URL OR upload a file β€” and I'll summarize it in bullet points with expressive AI narration!
""", unsafe_allow_html=True) # Voice selection st.sidebar.header("🎚️ Voice Settings") voice_label = st.sidebar.selectbox("Choose a voice:", list(voice_map.keys())) voice_id = voice_map[voice_label] tone_prompt = get_voice_prompt_style(voice_label) font_size = st.sidebar.radio("Font Size", ["Normal", "Large"]) font_class = "big-answer" if font_size == "Large" else "" # Add Bolt attribution to sidebar st.sidebar.markdown("---") st.sidebar.markdown("⚑ Made with [bolt.new](https://bolt.new)") # One-liners per voice preview_lines = { "grandma GG": "Back in my day, we didn’t need AI to sound this fabulous.", "tech wizard": "System online. You may now enter your query, human.", "perky sidekick": "You got this! Let’s answer that question together!", "bill the newscaster": "Breaking news β€” you’ve just selected the perfect voice.", "spunky charlie": "Whoa! Is it story time already? Let’s go!", "sassy teen": "Seriously? You better ask something cool." } preview_line = preview_lines.get(voice_label, "Testing voice.") st.markdown(f"🎧 {voice_label} says:", unsafe_allow_html=True) st.markdown(f"_{preview_line}_", unsafe_allow_html=True) # Stream preview audio (no autoplay) try: audio_stream = client.text_to_speech.convert( text=preview_line, voice_id=voice_id, model_id="eleven_multilingual_v2" ) full_audio_content = b"" for chunk in audio_stream: full_audio_content += chunk st.audio(full_audio_content) except Exception as e: st.warning("Voice preview unavailable.") logger.exception("🎧 Voice preview error") # Session state if "answer" not in st.session_state: st.session_state.answer = "" if "audio_key" not in st.session_state: st.session_state.audio_key = None if "file_text" not in st.session_state: st.session_state.file_text = "" if "key_points" not in st.session_state: st.session_state.key_points = [] # Inputs query = st.text_area("πŸ—¨οΈ Ask your question:", value="", placeholder="Ask your question", key="query") url = st.text_input("🌐 Or paste a URL:") uploaded_file = st.file_uploader("πŸ“Ž Or upload a file (PDF, TXT, DOCX)", type=["pdf", "txt", "docx"]) # File reader def extract_text_from_file(file): file_type = file.name.split('.')[-1].lower() if file_type == "pdf": try: with fitz.open(stream=file.read(), filetype="pdf") as doc: return "\n".join(page.get_text() for page in doc) except Exception as e: logger.error(f"❌ PDF read failed: {e}") return "Failed to read the PDF." elif file_type == "txt": return file.read().decode("utf-8", errors="ignore") elif file_type == "docx": try: doc = Document(file) return "\n".join(p.text for p in doc.paragraphs) except Exception as e: logger.error(f"❌ DOCX read failed: {e}") return "Failed to read the DOCX file." return "Unsupported file type." if uploaded_file: st.session_state.file_text = extract_text_from_file(uploaded_file) logger.info(f"πŸ“„ Extracted from file: {uploaded_file.name}") # Clear app if st.button("🧹 Clear All"): logger.info("🧼 Reset clicked") st.rerun() # GPT streaming def stream_openai_response(payload, headers): with requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload, stream=True) as r: for line in r.iter_lines(): if line and line.startswith(b"data: "): yield line[len(b"data: "):].decode() # Summarize if st.button("πŸ” Summarize"): if not query and not url and not uploaded_file: st.warning("Please enter a question, a URL, or upload a file.") logger.warning("⚠️ Summarize clicked with no input") else: with st.spinner("Talking to GPT..."): try: context = "" if st.session_state.file_text: context += st.session_state.file_text + "\n\n" if url: context += f"Summarize this page: {url}\n\n" context += ( "You are a voice assistant with the following tone:\n" f"{tone_prompt}\n\n" ) if query.strip(): context += f"Now answer this in bullet points:\n{query}" else: context += "Summarize the content above in bullet points." headers = {"Authorization": f"Bearer {OPENAI_API_KEY}"} payload = { "model": "gpt-4o", "messages": [{"role": "user", "content": context}], "temperature": 0.7, "stream": True } st.session_state.answer = "" answer_box = st.empty() logger.info("🧠 GPT stream started") for chunk in stream_openai_response(payload, headers): if chunk.strip() == "[DONE]": logger.info("🟒 GPT done") continue try: parsed = json.loads(chunk) delta = parsed['choices'][0]['delta'].get('content', '') st.session_state.answer += delta answer_box.markdown(f'
{st.session_state.answer}
', unsafe_allow_html=True) except json.JSONDecodeError: logger.warning(f"⚠️ Non-JSON chunk skipped: {chunk}") continue audio_key = str(uuid.uuid4()) generate_audio(st.session_state.answer, voice_id, audio_key) st.session_state.audio_key = audio_key logger.info(f"🎧 Audio ready: {audio_key}") except Exception as e: st.error(f"πŸ”₯ Error: {e}") logger.exception("πŸ”₯ GPT/audio failed") # Output if st.session_state.answer: st.subheader("πŸ“œ Answer") st.success(st.session_state.answer) if st.session_state.audio_key: audio_path = os.path.join(AUDIO_DIR, f"{st.session_state.audio_key}.mp3") if os.path.exists(audio_path): st.audio(audio_path) else: st.error("❗ Audio file missing.") logger.warning(f"❌ Missing audio file: {audio_path}")