Bonosa2 commited on
Commit
78e0e7a
Β·
verified Β·
1 Parent(s): 92501d4

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +131 -33
app.py CHANGED
@@ -1,9 +1,13 @@
1
  import os
2
  import uuid
3
  import json
 
4
  import requests
5
  import streamlit as st
 
 
6
  from dotenv import load_dotenv
 
7
  from utils import voice_map, get_voice_prompt_style, AUDIO_DIR
8
  from generate_audio import generate_audio
9
  from logger_setup import logger
@@ -11,60 +15,153 @@ from logger_setup import logger
11
  # Load API keys
12
  load_dotenv()
13
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 
 
14
 
15
  # Streamlit config
16
- st.set_page_config(page_title="Voice Agent Pro", page_icon="🎀")
17
- st.title("πŸŽ™οΈ Voice Agent Pro")
18
- st.markdown("Summarized answers with expressive AI voices.")
19
  logger.info("🎬 Streamlit app started")
20
 
21
- # Sidebar: voice picker
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  st.sidebar.header("🎚️ Voice Settings")
23
  voice_label = st.sidebar.selectbox("Choose a voice:", list(voice_map.keys()))
24
  voice_id = voice_map[voice_label]
25
  tone_prompt = get_voice_prompt_style(voice_label)
26
-
27
- # Session state setup
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  if "answer" not in st.session_state: st.session_state.answer = ""
29
  if "audio_key" not in st.session_state: st.session_state.audio_key = None
30
  if "file_text" not in st.session_state: st.session_state.file_text = ""
31
  if "key_points" not in st.session_state: st.session_state.key_points = []
32
 
33
  # Inputs
34
- query = st.text_area("πŸ—¨οΈ Ask or refine something based on the bullets:", value="", placeholder="e.g., What the heck are LLMS? Give me a 101", key="query")
35
- url = st.text_input("🌐 Optional URL to summarize:")
36
  uploaded_file = st.file_uploader("πŸ“Ž Or upload a file (PDF, TXT, DOCX)", type=["pdf", "txt", "docx"])
37
 
38
- # Reset state
39
- # Reset state safely without error
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  if st.button("🧹 Clear All"):
41
- logger.info("🧼 Clear All clicked β€” reloading app")
42
  st.rerun()
43
 
44
-
45
- # Helper: GPT streaming
46
  def stream_openai_response(payload, headers):
47
  with requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload, stream=True) as r:
48
  for line in r.iter_lines():
49
  if line and line.startswith(b"data: "):
50
  yield line[len(b"data: "):].decode()
51
 
52
- # Main: Summarize and speak
53
  if st.button("πŸ” Summarize"):
54
  if not query and not url and not uploaded_file:
55
  st.warning("Please enter a question, a URL, or upload a file.")
 
56
  else:
57
  with st.spinner("Talking to GPT..."):
58
  try:
59
- if uploaded_file:
60
- st.session_state.file_text = uploaded_file.read().decode("utf-8")
61
-
62
  context = ""
63
  if st.session_state.file_text:
64
  context += st.session_state.file_text + "\n\n"
65
  if url:
66
  context += f"Summarize this page: {url}\n\n"
67
- context += f"{tone_prompt}\n\nNow answer: {query}"
 
 
 
 
 
 
68
 
69
  headers = {"Authorization": f"Bearer {OPENAI_API_KEY}"}
70
  payload = {
@@ -76,40 +173,41 @@ if st.button("πŸ” Summarize"):
76
 
77
  st.session_state.answer = ""
78
  answer_box = st.empty()
79
- logger.info("🧠 Starting GPT streaming")
80
 
81
  for chunk in stream_openai_response(payload, headers):
82
  if chunk.strip() == "[DONE]":
83
- logger.info("🟒 GPT stream complete marker received")
84
  continue
85
-
86
  try:
87
  parsed = json.loads(chunk)
88
  delta = parsed['choices'][0]['delta'].get('content', '')
89
  st.session_state.answer += delta
90
- answer_box.markdown(st.session_state.answer)
91
- except json.JSONDecodeError as json_err:
92
- logger.warning(f"⚠️ Skipping non-JSON chunk: {chunk}")
93
  continue
94
 
95
-
96
- logger.info("🧠 GPT response complete. Now generating audio...")
97
-
98
  audio_key = str(uuid.uuid4())
99
  generate_audio(st.session_state.answer, voice_id, audio_key)
100
  st.session_state.audio_key = audio_key
 
101
 
102
  except Exception as e:
103
  st.error(f"πŸ”₯ Error: {e}")
104
- logger.exception("πŸ”₯ Exception during summarize or audio generation")
 
 
 
 
 
 
 
105
 
106
- # Final display
107
- if st.session_state.answer:
108
- st.subheader("πŸ“œ Answer")
109
- st.success(st.session_state.answer)
110
  if st.session_state.audio_key:
111
  audio_path = os.path.join(AUDIO_DIR, f"{st.session_state.audio_key}.mp3")
112
  if os.path.exists(audio_path):
113
  st.audio(audio_path)
114
  else:
115
- st.error("❗ Audio file missing. Please check logs.")
 
 
1
  import os
2
  import uuid
3
  import json
4
+ import fitz
5
  import requests
6
  import streamlit as st
7
+ from io import BytesIO
8
+ from docx import Document
9
  from dotenv import load_dotenv
10
+ from elevenlabs.client import ElevenLabs
11
  from utils import voice_map, get_voice_prompt_style, AUDIO_DIR
12
  from generate_audio import generate_audio
13
  from logger_setup import logger
 
15
  # Load API keys
16
  load_dotenv()
17
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
18
+ ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
19
+ client = ElevenLabs(api_key=ELEVENLABS_API_KEY)
20
 
21
  # Streamlit config
22
+ st.set_page_config(page_title="Voice Agent Pro", page_icon="🎧")
 
 
23
  logger.info("🎬 Streamlit app started")
24
 
25
+ # Inject large fonts + tips
26
+ st.markdown("""
27
+ <style>
28
+ .big-title {
29
+ font-size: 2.4em !important;
30
+ font-weight: bold;
31
+ color: #333333;
32
+ text-align: center;
33
+ }
34
+ .big-answer {
35
+ font-size: 1.6em;
36
+ line-height: 1.5;
37
+ color: #111;
38
+ }
39
+ textarea, input {
40
+ font-size: 1.2em !important;
41
+ }
42
+ .instructions {
43
+ font-size: 1.1em;
44
+ padding: 0.5em;
45
+ background-color: #f0f4ff;
46
+ border-radius: 0.5em;
47
+ margin-bottom: 1em;
48
+ }
49
+ </style>
50
+ """, unsafe_allow_html=True)
51
+
52
+ st.markdown('<div class="big-title">🎧 Voice Agent Pro</div>', unsafe_allow_html=True)
53
+ st.markdown('<div class="instructions">Ask a question <b>OR</b> paste a URL <b>OR</b> upload a file β€” and I’ll summarize it in bullet points with expressive AI narration!</div>', unsafe_allow_html=True)
54
+
55
+ # Voice selection
56
  st.sidebar.header("🎚️ Voice Settings")
57
  voice_label = st.sidebar.selectbox("Choose a voice:", list(voice_map.keys()))
58
  voice_id = voice_map[voice_label]
59
  tone_prompt = get_voice_prompt_style(voice_label)
60
+ font_size = st.sidebar.radio("Font Size", ["Normal", "Large"])
61
+ font_class = "big-answer" if font_size == "Large" else ""
62
+
63
+ # One-liners per voice
64
+ preview_lines = {
65
+ "grandma GG": "Back in my day, we didn’t need AI to sound this fabulous.",
66
+ "tech wizard": "System online. You may now enter your query, human.",
67
+ "perky sidekick": "You got this! Let’s answer that question together!",
68
+ "bill the newscaster": "Breaking news β€” you’ve just selected the perfect voice.",
69
+ "spunky charlie": "Whoa! Is it story time already? Let’s go!",
70
+ "sassy teen": "Seriously? You better ask something cool."
71
+ }
72
+
73
+ preview_line = preview_lines.get(voice_label, "Testing voice.")
74
+ st.markdown(f"🎧 <b>{voice_label}</b> says:", unsafe_allow_html=True)
75
+ st.markdown(f"_β€œ{preview_line}”_", unsafe_allow_html=True)
76
+
77
+ # Stream preview audio (no autoplay)
78
+ try:
79
+ stream = client.text_to_speech.convert_as_stream(
80
+ text=preview_line,
81
+ voice_id=voice_id,
82
+ model_id="eleven_multilingual_v2"
83
+ )
84
+ preview_audio = BytesIO()
85
+ for chunk in stream:
86
+ if isinstance(chunk, bytes):
87
+ preview_audio.write(chunk)
88
+ st.audio(preview_audio.getvalue())
89
+ except Exception as e:
90
+ st.warning("Voice preview unavailable.")
91
+ logger.exception("🎧 Voice preview error")
92
+
93
+ # Session state
94
  if "answer" not in st.session_state: st.session_state.answer = ""
95
  if "audio_key" not in st.session_state: st.session_state.audio_key = None
96
  if "file_text" not in st.session_state: st.session_state.file_text = ""
97
  if "key_points" not in st.session_state: st.session_state.key_points = []
98
 
99
  # Inputs
100
+ query = st.text_area("πŸ—¨οΈ Ask your question:", value="", placeholder="Ask your question", key="query")
101
+ url = st.text_input("🌐 Or paste a URL:")
102
  uploaded_file = st.file_uploader("πŸ“Ž Or upload a file (PDF, TXT, DOCX)", type=["pdf", "txt", "docx"])
103
 
104
+ # File reader
105
+ def extract_text_from_file(file):
106
+ file_type = file.name.split('.')[-1].lower()
107
+
108
+ if file_type == "pdf":
109
+ try:
110
+ with fitz.open(stream=file.read(), filetype="pdf") as doc:
111
+ return "\n".join(page.get_text() for page in doc)
112
+ except Exception as e:
113
+ logger.error(f"❌ PDF read failed: {e}")
114
+ return "Failed to read the PDF."
115
+
116
+ elif file_type == "txt":
117
+ return file.read().decode("utf-8", errors="ignore")
118
+
119
+ elif file_type == "docx":
120
+ try:
121
+ doc = Document(file)
122
+ return "\n".join(p.text for p in doc.paragraphs)
123
+ except Exception as e:
124
+ logger.error(f"❌ DOCX read failed: {e}")
125
+ return "Failed to read the DOCX file."
126
+
127
+ return "Unsupported file type."
128
+
129
+ if uploaded_file:
130
+ st.session_state.file_text = extract_text_from_file(uploaded_file)
131
+ logger.info(f"πŸ“„ Extracted from file: {uploaded_file.name}")
132
+
133
+ # Clear app
134
  if st.button("🧹 Clear All"):
135
+ logger.info("🧼 Reset clicked")
136
  st.rerun()
137
 
138
+ # GPT streaming
 
139
  def stream_openai_response(payload, headers):
140
  with requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload, stream=True) as r:
141
  for line in r.iter_lines():
142
  if line and line.startswith(b"data: "):
143
  yield line[len(b"data: "):].decode()
144
 
145
+ # Summarize
146
  if st.button("πŸ” Summarize"):
147
  if not query and not url and not uploaded_file:
148
  st.warning("Please enter a question, a URL, or upload a file.")
149
+ logger.warning("⚠️ Summarize clicked with no input")
150
  else:
151
  with st.spinner("Talking to GPT..."):
152
  try:
 
 
 
153
  context = ""
154
  if st.session_state.file_text:
155
  context += st.session_state.file_text + "\n\n"
156
  if url:
157
  context += f"Summarize this page: {url}\n\n"
158
+
159
+ context += f"{tone_prompt}\n\n"
160
+
161
+ if query.strip():
162
+ context += f"Now answer this in bullet points:\n{query}"
163
+ else:
164
+ context += "Summarize the content above in bullet points."
165
 
166
  headers = {"Authorization": f"Bearer {OPENAI_API_KEY}"}
167
  payload = {
 
173
 
174
  st.session_state.answer = ""
175
  answer_box = st.empty()
176
+ logger.info("🧠 GPT stream started")
177
 
178
  for chunk in stream_openai_response(payload, headers):
179
  if chunk.strip() == "[DONE]":
180
+ logger.info("🟒 GPT done")
181
  continue
 
182
  try:
183
  parsed = json.loads(chunk)
184
  delta = parsed['choices'][0]['delta'].get('content', '')
185
  st.session_state.answer += delta
186
+ answer_box.markdown(f'<div class="{font_class}">{st.session_state.answer}</div>', unsafe_allow_html=True)
187
+ except json.JSONDecodeError:
188
+ logger.warning(f"⚠️ Non-JSON chunk skipped: {chunk}")
189
  continue
190
 
 
 
 
191
  audio_key = str(uuid.uuid4())
192
  generate_audio(st.session_state.answer, voice_id, audio_key)
193
  st.session_state.audio_key = audio_key
194
+ logger.info(f"🎧 Audio ready: {audio_key}")
195
 
196
  except Exception as e:
197
  st.error(f"πŸ”₯ Error: {e}")
198
+ logger.exception("πŸ”₯ GPT/audio failed")
199
+
200
+ # Output
201
+ if st.session_state.answer:
202
+ st.subheader("πŸ“œ Answer")
203
+ st.success(st.session_state.answer) # <- green background box
204
+
205
+ st.success("βœ… Summary ready and audio generated!")
206
 
 
 
 
 
207
  if st.session_state.audio_key:
208
  audio_path = os.path.join(AUDIO_DIR, f"{st.session_state.audio_key}.mp3")
209
  if os.path.exists(audio_path):
210
  st.audio(audio_path)
211
  else:
212
+ st.error("❗ Audio file missing.")
213
+ logger.warning(f"❌ Missing audio file: {audio_path}")