Spaces:

Bonosa2
/

AI-TALKS-BACK

Sleeping

App Files Files Community

AI-TALKS-BACK / app.py

Bonosa2

Upload app.py

3819c76 verified about 2 months ago

raw

history blame contribute delete

7.99 kB

	import os
	import uuid
	import json
	import fitz
	import requests
	import streamlit as st
	from io import BytesIO
	from docx import Document
	from dotenv import load_dotenv
	from elevenlabs.client import ElevenLabs
	from utils import voice_map, get_voice_prompt_style, AUDIO_DIR
	from generate_audio import generate_audio
	from logger_setup import logger

	# Load API keys
	load_dotenv()
	OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
	ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
	client = ElevenLabs(api_key=ELEVENLABS_API_KEY)

	# Streamlit config
	st.set_page_config(page_title="Voice Agent Pro", page_icon="🎧")
	logger.info("🎬 Streamlit app started")

	# Inject large fonts + tips
	st.markdown("""
	<style>
	.big-title {
	font-size: 2.4em !important;
	font-weight: bold;
	color: #333333;
	text-align: center;
	}
	.big-answer {
	font-size: 1.6em;
	line-height: 1.5;
	color: #111;
	}
	textarea, input {
	font-size: 1.2em !important;
	}
	.instructions {
	font-size: 1.1em;
	padding: 0.5em;
	background-color: #f0f4ff;
	border-radius: 0.5em;
	margin-bottom: 1em;
	}
	</style>
	""", unsafe_allow_html=True)

	st.markdown('<div class="big-title">🎧 Voice Agent Pro</div>', unsafe_allow_html=True)
	st.markdown("""<div class="instructions">Ask a question <b>OR</b> paste a URL <b>OR</b> upload a file — and I'll summarize it in bullet points with expressive AI narration!</div>""", unsafe_allow_html=True)


	# Voice selection
	st.sidebar.header("🎚️ Voice Settings")
	voice_label = st.sidebar.selectbox("Choose a voice:", list(voice_map.keys()))
	voice_id = voice_map[voice_label]
	tone_prompt = get_voice_prompt_style(voice_label)
	font_size = st.sidebar.radio("Font Size", ["Normal", "Large"])
	font_class = "big-answer" if font_size == "Large" else ""

	# Add Bolt attribution to sidebar
	st.sidebar.markdown("---")
	st.sidebar.markdown("⚡ Made with [bolt.new](https://bolt.new)")


	# One-liners per voice
	preview_lines = {
	"grandma GG": "Back in my day, we didn’t need AI to sound this fabulous.",
	"tech wizard": "System online. You may now enter your query, human.",
	"perky sidekick": "You got this! Let’s answer that question together!",
	"bill the newscaster": "Breaking news — you’ve just selected the perfect voice.",
	"spunky charlie": "Whoa! Is it story time already? Let’s go!",
	"sassy teen": "Seriously? You better ask something cool."
	}

	preview_line = preview_lines.get(voice_label, "Testing voice.")
	st.markdown(f"🎧 <b>{voice_label}</b> says:", unsafe_allow_html=True)
	st.markdown(f"_{preview_line}_", unsafe_allow_html=True)

	# Stream preview audio (no autoplay)
	try:
	audio_stream = client.text_to_speech.convert(
	text=preview_line,
	voice_id=voice_id,
	model_id="eleven_multilingual_v2"
	)

	full_audio_content = b""
	for chunk in audio_stream:
	full_audio_content += chunk

	st.audio(full_audio_content)

	except Exception as e:
	st.warning("Voice preview unavailable.")
	logger.exception("🎧 Voice preview error")

	# Session state
	if "answer" not in st.session_state: st.session_state.answer = ""
	if "audio_key" not in st.session_state: st.session_state.audio_key = None
	if "file_text" not in st.session_state: st.session_state.file_text = ""
	if "key_points" not in st.session_state: st.session_state.key_points = []

	# Inputs
	query = st.text_area("🗨️ Ask your question:", value="", placeholder="Ask your question", key="query")
	url = st.text_input("🌐 Or paste a URL:")
	uploaded_file = st.file_uploader("📎 Or upload a file (PDF, TXT, DOCX)", type=["pdf", "txt", "docx"])

	# File reader
	def extract_text_from_file(file):
	file_type = file.name.split('.')[-1].lower()

	if file_type == "pdf":
	try:
	with fitz.open(stream=file.read(), filetype="pdf") as doc:
	return "\n".join(page.get_text() for page in doc)
	except Exception as e:
	logger.error(f"❌ PDF read failed: {e}")
	return "Failed to read the PDF."

	elif file_type == "txt":
	return file.read().decode("utf-8", errors="ignore")

	elif file_type == "docx":
	try:
	doc = Document(file)
	return "\n".join(p.text for p in doc.paragraphs)
	except Exception as e:
	logger.error(f"❌ DOCX read failed: {e}")
	return "Failed to read the DOCX file."

	return "Unsupported file type."

	if uploaded_file:
	st.session_state.file_text = extract_text_from_file(uploaded_file)
	logger.info(f"📄 Extracted from file: {uploaded_file.name}")

	# Clear app
	if st.button("🧹 Clear All"):
	logger.info("🧼 Reset clicked")
	st.rerun()

	# GPT streaming
	def stream_openai_response(payload, headers):
	with requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload, stream=True) as r:
	for line in r.iter_lines():
	if line and line.startswith(b"data: "):
	yield line[len(b"data: "):].decode()

	# Summarize
	if st.button("🔁 Summarize"):
	if not query and not url and not uploaded_file:
	st.warning("Please enter a question, a URL, or upload a file.")
	logger.warning("⚠️ Summarize clicked with no input")
	else:
	with st.spinner("Talking to GPT..."):
	try:
	context = ""
	if st.session_state.file_text:
	context += st.session_state.file_text + "\n\n"
	if url:
	context += f"Summarize this page: {url}\n\n"

	context += (
	"You are a voice assistant with the following tone:\n"
	f"{tone_prompt}\n\n"
	)

	if query.strip():
	context += f"Now answer this in bullet points:\n{query}"
	else:
	context += "Summarize the content above in bullet points."

	headers = {"Authorization": f"Bearer {OPENAI_API_KEY}"}
	payload = {
	"model": "gpt-4o",
	"messages": [{"role": "user", "content": context}],
	"temperature": 0.7,
	"stream": True
	}

	st.session_state.answer = ""
	answer_box = st.empty()
	logger.info("🧠 GPT stream started")

	for chunk in stream_openai_response(payload, headers):
	if chunk.strip() == "[DONE]":
	logger.info("🟢 GPT done")
	continue
	try:
	parsed = json.loads(chunk)
	delta = parsed['choices'][0]['delta'].get('content', '')
	st.session_state.answer += delta
	answer_box.markdown(f'<div class="{font_class}">{st.session_state.answer}</div>', unsafe_allow_html=True)
	except json.JSONDecodeError:
	logger.warning(f"⚠️ Non-JSON chunk skipped: {chunk}")
	continue

	audio_key = str(uuid.uuid4())
	generate_audio(st.session_state.answer, voice_id, audio_key)
	st.session_state.audio_key = audio_key
	logger.info(f"🎧 Audio ready: {audio_key}")

	except Exception as e:
	st.error(f"🔥 Error: {e}")
	logger.exception("🔥 GPT/audio failed")

	# Output
	if st.session_state.answer:
	st.subheader("📜 Answer")
	st.success(st.session_state.answer)

	if st.session_state.audio_key:
	audio_path = os.path.join(AUDIO_DIR, f"{st.session_state.audio_key}.mp3")
	if os.path.exists(audio_path):
	st.audio(audio_path)
	else:
	st.error("❗ Audio file missing.")
	logger.warning(f"❌ Missing audio file: {audio_path}")