Spaces:
Running
Running
Upload app.py
Browse files
app.py
CHANGED
@@ -1,9 +1,13 @@
|
|
1 |
import os
|
2 |
import uuid
|
3 |
import json
|
|
|
4 |
import requests
|
5 |
import streamlit as st
|
|
|
|
|
6 |
from dotenv import load_dotenv
|
|
|
7 |
from utils import voice_map, get_voice_prompt_style, AUDIO_DIR
|
8 |
from generate_audio import generate_audio
|
9 |
from logger_setup import logger
|
@@ -11,60 +15,153 @@ from logger_setup import logger
|
|
11 |
# Load API keys
|
12 |
load_dotenv()
|
13 |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
|
|
|
|
14 |
|
15 |
# Streamlit config
|
16 |
-
st.set_page_config(page_title="Voice Agent Pro", page_icon="
|
17 |
-
st.title("ποΈ Voice Agent Pro")
|
18 |
-
st.markdown("Summarized answers with expressive AI voices.")
|
19 |
logger.info("π¬ Streamlit app started")
|
20 |
|
21 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
st.sidebar.header("ποΈ Voice Settings")
|
23 |
voice_label = st.sidebar.selectbox("Choose a voice:", list(voice_map.keys()))
|
24 |
voice_id = voice_map[voice_label]
|
25 |
tone_prompt = get_voice_prompt_style(voice_label)
|
26 |
-
|
27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
if "answer" not in st.session_state: st.session_state.answer = ""
|
29 |
if "audio_key" not in st.session_state: st.session_state.audio_key = None
|
30 |
if "file_text" not in st.session_state: st.session_state.file_text = ""
|
31 |
if "key_points" not in st.session_state: st.session_state.key_points = []
|
32 |
|
33 |
# Inputs
|
34 |
-
query = st.text_area("π¨οΈ Ask
|
35 |
-
url = st.text_input("π
|
36 |
uploaded_file = st.file_uploader("π Or upload a file (PDF, TXT, DOCX)", type=["pdf", "txt", "docx"])
|
37 |
|
38 |
-
#
|
39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
if st.button("π§Ή Clear All"):
|
41 |
-
logger.info("π§Ό
|
42 |
st.rerun()
|
43 |
|
44 |
-
|
45 |
-
# Helper: GPT streaming
|
46 |
def stream_openai_response(payload, headers):
|
47 |
with requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload, stream=True) as r:
|
48 |
for line in r.iter_lines():
|
49 |
if line and line.startswith(b"data: "):
|
50 |
yield line[len(b"data: "):].decode()
|
51 |
|
52 |
-
#
|
53 |
if st.button("π Summarize"):
|
54 |
if not query and not url and not uploaded_file:
|
55 |
st.warning("Please enter a question, a URL, or upload a file.")
|
|
|
56 |
else:
|
57 |
with st.spinner("Talking to GPT..."):
|
58 |
try:
|
59 |
-
if uploaded_file:
|
60 |
-
st.session_state.file_text = uploaded_file.read().decode("utf-8")
|
61 |
-
|
62 |
context = ""
|
63 |
if st.session_state.file_text:
|
64 |
context += st.session_state.file_text + "\n\n"
|
65 |
if url:
|
66 |
context += f"Summarize this page: {url}\n\n"
|
67 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
|
69 |
headers = {"Authorization": f"Bearer {OPENAI_API_KEY}"}
|
70 |
payload = {
|
@@ -76,40 +173,41 @@ if st.button("π Summarize"):
|
|
76 |
|
77 |
st.session_state.answer = ""
|
78 |
answer_box = st.empty()
|
79 |
-
logger.info("π§
|
80 |
|
81 |
for chunk in stream_openai_response(payload, headers):
|
82 |
if chunk.strip() == "[DONE]":
|
83 |
-
logger.info("π’ GPT
|
84 |
continue
|
85 |
-
|
86 |
try:
|
87 |
parsed = json.loads(chunk)
|
88 |
delta = parsed['choices'][0]['delta'].get('content', '')
|
89 |
st.session_state.answer += delta
|
90 |
-
answer_box.markdown(st.session_state.answer)
|
91 |
-
except json.JSONDecodeError
|
92 |
-
logger.warning(f"β οΈ
|
93 |
continue
|
94 |
|
95 |
-
|
96 |
-
logger.info("π§ GPT response complete. Now generating audio...")
|
97 |
-
|
98 |
audio_key = str(uuid.uuid4())
|
99 |
generate_audio(st.session_state.answer, voice_id, audio_key)
|
100 |
st.session_state.audio_key = audio_key
|
|
|
101 |
|
102 |
except Exception as e:
|
103 |
st.error(f"π₯ Error: {e}")
|
104 |
-
logger.exception("π₯
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
|
106 |
-
# Final display
|
107 |
-
if st.session_state.answer:
|
108 |
-
st.subheader("π Answer")
|
109 |
-
st.success(st.session_state.answer)
|
110 |
if st.session_state.audio_key:
|
111 |
audio_path = os.path.join(AUDIO_DIR, f"{st.session_state.audio_key}.mp3")
|
112 |
if os.path.exists(audio_path):
|
113 |
st.audio(audio_path)
|
114 |
else:
|
115 |
-
st.error("β Audio file missing.
|
|
|
|
1 |
import os
|
2 |
import uuid
|
3 |
import json
|
4 |
+
import fitz
|
5 |
import requests
|
6 |
import streamlit as st
|
7 |
+
from io import BytesIO
|
8 |
+
from docx import Document
|
9 |
from dotenv import load_dotenv
|
10 |
+
from elevenlabs.client import ElevenLabs
|
11 |
from utils import voice_map, get_voice_prompt_style, AUDIO_DIR
|
12 |
from generate_audio import generate_audio
|
13 |
from logger_setup import logger
|
|
|
15 |
# Load API keys
|
16 |
load_dotenv()
|
17 |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
18 |
+
ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
|
19 |
+
client = ElevenLabs(api_key=ELEVENLABS_API_KEY)
|
20 |
|
21 |
# Streamlit config
|
22 |
+
st.set_page_config(page_title="Voice Agent Pro", page_icon="π§")
|
|
|
|
|
23 |
logger.info("π¬ Streamlit app started")
|
24 |
|
25 |
+
# Inject large fonts + tips
|
26 |
+
st.markdown("""
|
27 |
+
<style>
|
28 |
+
.big-title {
|
29 |
+
font-size: 2.4em !important;
|
30 |
+
font-weight: bold;
|
31 |
+
color: #333333;
|
32 |
+
text-align: center;
|
33 |
+
}
|
34 |
+
.big-answer {
|
35 |
+
font-size: 1.6em;
|
36 |
+
line-height: 1.5;
|
37 |
+
color: #111;
|
38 |
+
}
|
39 |
+
textarea, input {
|
40 |
+
font-size: 1.2em !important;
|
41 |
+
}
|
42 |
+
.instructions {
|
43 |
+
font-size: 1.1em;
|
44 |
+
padding: 0.5em;
|
45 |
+
background-color: #f0f4ff;
|
46 |
+
border-radius: 0.5em;
|
47 |
+
margin-bottom: 1em;
|
48 |
+
}
|
49 |
+
</style>
|
50 |
+
""", unsafe_allow_html=True)
|
51 |
+
|
52 |
+
st.markdown('<div class="big-title">π§ Voice Agent Pro</div>', unsafe_allow_html=True)
|
53 |
+
st.markdown('<div class="instructions">Ask a question <b>OR</b> paste a URL <b>OR</b> upload a file β and Iβll summarize it in bullet points with expressive AI narration!</div>', unsafe_allow_html=True)
|
54 |
+
|
55 |
+
# Voice selection
|
56 |
st.sidebar.header("ποΈ Voice Settings")
|
57 |
voice_label = st.sidebar.selectbox("Choose a voice:", list(voice_map.keys()))
|
58 |
voice_id = voice_map[voice_label]
|
59 |
tone_prompt = get_voice_prompt_style(voice_label)
|
60 |
+
font_size = st.sidebar.radio("Font Size", ["Normal", "Large"])
|
61 |
+
font_class = "big-answer" if font_size == "Large" else ""
|
62 |
+
|
63 |
+
# One-liners per voice
|
64 |
+
preview_lines = {
|
65 |
+
"grandma GG": "Back in my day, we didnβt need AI to sound this fabulous.",
|
66 |
+
"tech wizard": "System online. You may now enter your query, human.",
|
67 |
+
"perky sidekick": "You got this! Letβs answer that question together!",
|
68 |
+
"bill the newscaster": "Breaking news β youβve just selected the perfect voice.",
|
69 |
+
"spunky charlie": "Whoa! Is it story time already? Letβs go!",
|
70 |
+
"sassy teen": "Seriously? You better ask something cool."
|
71 |
+
}
|
72 |
+
|
73 |
+
preview_line = preview_lines.get(voice_label, "Testing voice.")
|
74 |
+
st.markdown(f"π§ <b>{voice_label}</b> says:", unsafe_allow_html=True)
|
75 |
+
st.markdown(f"_β{preview_line}β_", unsafe_allow_html=True)
|
76 |
+
|
77 |
+
# Stream preview audio (no autoplay)
|
78 |
+
try:
|
79 |
+
stream = client.text_to_speech.convert_as_stream(
|
80 |
+
text=preview_line,
|
81 |
+
voice_id=voice_id,
|
82 |
+
model_id="eleven_multilingual_v2"
|
83 |
+
)
|
84 |
+
preview_audio = BytesIO()
|
85 |
+
for chunk in stream:
|
86 |
+
if isinstance(chunk, bytes):
|
87 |
+
preview_audio.write(chunk)
|
88 |
+
st.audio(preview_audio.getvalue())
|
89 |
+
except Exception as e:
|
90 |
+
st.warning("Voice preview unavailable.")
|
91 |
+
logger.exception("π§ Voice preview error")
|
92 |
+
|
93 |
+
# Session state
|
94 |
if "answer" not in st.session_state: st.session_state.answer = ""
|
95 |
if "audio_key" not in st.session_state: st.session_state.audio_key = None
|
96 |
if "file_text" not in st.session_state: st.session_state.file_text = ""
|
97 |
if "key_points" not in st.session_state: st.session_state.key_points = []
|
98 |
|
99 |
# Inputs
|
100 |
+
query = st.text_area("π¨οΈ Ask your question:", value="", placeholder="Ask your question", key="query")
|
101 |
+
url = st.text_input("π Or paste a URL:")
|
102 |
uploaded_file = st.file_uploader("π Or upload a file (PDF, TXT, DOCX)", type=["pdf", "txt", "docx"])
|
103 |
|
104 |
+
# File reader
|
105 |
+
def extract_text_from_file(file):
|
106 |
+
file_type = file.name.split('.')[-1].lower()
|
107 |
+
|
108 |
+
if file_type == "pdf":
|
109 |
+
try:
|
110 |
+
with fitz.open(stream=file.read(), filetype="pdf") as doc:
|
111 |
+
return "\n".join(page.get_text() for page in doc)
|
112 |
+
except Exception as e:
|
113 |
+
logger.error(f"β PDF read failed: {e}")
|
114 |
+
return "Failed to read the PDF."
|
115 |
+
|
116 |
+
elif file_type == "txt":
|
117 |
+
return file.read().decode("utf-8", errors="ignore")
|
118 |
+
|
119 |
+
elif file_type == "docx":
|
120 |
+
try:
|
121 |
+
doc = Document(file)
|
122 |
+
return "\n".join(p.text for p in doc.paragraphs)
|
123 |
+
except Exception as e:
|
124 |
+
logger.error(f"β DOCX read failed: {e}")
|
125 |
+
return "Failed to read the DOCX file."
|
126 |
+
|
127 |
+
return "Unsupported file type."
|
128 |
+
|
129 |
+
if uploaded_file:
|
130 |
+
st.session_state.file_text = extract_text_from_file(uploaded_file)
|
131 |
+
logger.info(f"π Extracted from file: {uploaded_file.name}")
|
132 |
+
|
133 |
+
# Clear app
|
134 |
if st.button("π§Ή Clear All"):
|
135 |
+
logger.info("π§Ό Reset clicked")
|
136 |
st.rerun()
|
137 |
|
138 |
+
# GPT streaming
|
|
|
139 |
def stream_openai_response(payload, headers):
|
140 |
with requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload, stream=True) as r:
|
141 |
for line in r.iter_lines():
|
142 |
if line and line.startswith(b"data: "):
|
143 |
yield line[len(b"data: "):].decode()
|
144 |
|
145 |
+
# Summarize
|
146 |
if st.button("π Summarize"):
|
147 |
if not query and not url and not uploaded_file:
|
148 |
st.warning("Please enter a question, a URL, or upload a file.")
|
149 |
+
logger.warning("β οΈ Summarize clicked with no input")
|
150 |
else:
|
151 |
with st.spinner("Talking to GPT..."):
|
152 |
try:
|
|
|
|
|
|
|
153 |
context = ""
|
154 |
if st.session_state.file_text:
|
155 |
context += st.session_state.file_text + "\n\n"
|
156 |
if url:
|
157 |
context += f"Summarize this page: {url}\n\n"
|
158 |
+
|
159 |
+
context += f"{tone_prompt}\n\n"
|
160 |
+
|
161 |
+
if query.strip():
|
162 |
+
context += f"Now answer this in bullet points:\n{query}"
|
163 |
+
else:
|
164 |
+
context += "Summarize the content above in bullet points."
|
165 |
|
166 |
headers = {"Authorization": f"Bearer {OPENAI_API_KEY}"}
|
167 |
payload = {
|
|
|
173 |
|
174 |
st.session_state.answer = ""
|
175 |
answer_box = st.empty()
|
176 |
+
logger.info("π§ GPT stream started")
|
177 |
|
178 |
for chunk in stream_openai_response(payload, headers):
|
179 |
if chunk.strip() == "[DONE]":
|
180 |
+
logger.info("π’ GPT done")
|
181 |
continue
|
|
|
182 |
try:
|
183 |
parsed = json.loads(chunk)
|
184 |
delta = parsed['choices'][0]['delta'].get('content', '')
|
185 |
st.session_state.answer += delta
|
186 |
+
answer_box.markdown(f'<div class="{font_class}">{st.session_state.answer}</div>', unsafe_allow_html=True)
|
187 |
+
except json.JSONDecodeError:
|
188 |
+
logger.warning(f"β οΈ Non-JSON chunk skipped: {chunk}")
|
189 |
continue
|
190 |
|
|
|
|
|
|
|
191 |
audio_key = str(uuid.uuid4())
|
192 |
generate_audio(st.session_state.answer, voice_id, audio_key)
|
193 |
st.session_state.audio_key = audio_key
|
194 |
+
logger.info(f"π§ Audio ready: {audio_key}")
|
195 |
|
196 |
except Exception as e:
|
197 |
st.error(f"π₯ Error: {e}")
|
198 |
+
logger.exception("π₯ GPT/audio failed")
|
199 |
+
|
200 |
+
# Output
|
201 |
+
if st.session_state.answer:
|
202 |
+
st.subheader("π Answer")
|
203 |
+
st.success(st.session_state.answer) # <- green background box
|
204 |
+
|
205 |
+
st.success("β
Summary ready and audio generated!")
|
206 |
|
|
|
|
|
|
|
|
|
207 |
if st.session_state.audio_key:
|
208 |
audio_path = os.path.join(AUDIO_DIR, f"{st.session_state.audio_key}.mp3")
|
209 |
if os.path.exists(audio_path):
|
210 |
st.audio(audio_path)
|
211 |
else:
|
212 |
+
st.error("β Audio file missing.")
|
213 |
+
logger.warning(f"β Missing audio file: {audio_path}")
|