Bonosa2 commited on
Commit
484c797
Β·
verified Β·
1 Parent(s): 4e2165c

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +36 -30
  2. generate_audio.py +34 -19
  3. logger_setup.py +16 -0
app.py CHANGED
@@ -1,71 +1,64 @@
1
  import os
2
  import uuid
3
  import json
4
- import logging
5
  import requests
6
  import streamlit as st
7
  from dotenv import load_dotenv
8
  from utils import voice_map, get_voice_prompt_style, AUDIO_DIR
9
  from generate_audio import generate_audio
 
10
 
11
- # Load API keys from .env
12
  load_dotenv()
13
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
14
- ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
15
 
16
- # Ensure audio output folder exists
17
- os.makedirs(AUDIO_DIR, exist_ok=True)
18
- logging.basicConfig(filename="frontend.log", level=logging.INFO)
19
-
20
- # Streamlit UI config
21
  st.set_page_config(page_title="Voice Agent Pro", page_icon="🎀")
22
  st.title("πŸŽ™οΈ Voice Agent Pro")
23
  st.markdown("Summarized answers with expressive AI voices.")
 
24
 
25
- # Sidebar: select voice
26
  st.sidebar.header("🎚️ Voice Settings")
27
  voice_label = st.sidebar.selectbox("Choose a voice:", list(voice_map.keys()))
28
  voice_id = voice_map[voice_label]
29
  tone_prompt = get_voice_prompt_style(voice_label)
30
 
31
- # App state
32
  if "answer" not in st.session_state: st.session_state.answer = ""
33
  if "audio_key" not in st.session_state: st.session_state.audio_key = None
34
  if "file_text" not in st.session_state: st.session_state.file_text = ""
35
  if "key_points" not in st.session_state: st.session_state.key_points = []
36
 
37
- # Input fields
38
  query = st.text_area("πŸ—¨οΈ Ask or refine something based on the bullets:", value="", placeholder="e.g., What makes you so cool, Grandma?", key="query")
39
  url = st.text_input("🌐 Optional URL to summarize:")
40
  uploaded_file = st.file_uploader("πŸ“Ž Or upload a file (PDF, TXT, DOCX)", type=["pdf", "txt", "docx"])
41
 
42
- # Reset app state
 
43
  if st.button("🧹 Clear All"):
44
- st.session_state.query = ""
45
- st.session_state.file_text = ""
46
- st.session_state.answer = ""
47
- st.session_state.audio_key = None
48
- st.session_state.key_points = []
49
 
50
- # Helper: OpenAI response streaming
51
  def stream_openai_response(payload, headers):
52
  with requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload, stream=True) as r:
53
  for line in r.iter_lines():
54
  if line and line.startswith(b"data: "):
55
  yield line[len(b"data: "):].decode()
56
 
57
- # Main button: summarize and speak
58
  if st.button("πŸ” Summarize"):
59
  if not query and not url and not uploaded_file:
60
  st.warning("Please enter a question, a URL, or upload a file.")
61
  else:
62
  with st.spinner("Talking to GPT..."):
63
  try:
64
- # Load file content if present
65
  if uploaded_file:
66
  st.session_state.file_text = uploaded_file.read().decode("utf-8")
67
 
68
- # Build GPT prompt
69
  context = ""
70
  if st.session_state.file_text:
71
  context += st.session_state.file_text + "\n\n"
@@ -73,7 +66,6 @@ if st.button("πŸ” Summarize"):
73
  context += f"Summarize this page: {url}\n\n"
74
  context += f"{tone_prompt}\n\nNow answer: {query}"
75
 
76
- # OpenAI request setup
77
  headers = {"Authorization": f"Bearer {OPENAI_API_KEY}"}
78
  payload = {
79
  "model": "gpt-4o",
@@ -82,28 +74,42 @@ if st.button("πŸ” Summarize"):
82
  "stream": True
83
  }
84
 
85
- # Streaming UI update
86
  st.session_state.answer = ""
87
  answer_box = st.empty()
 
88
 
89
  for chunk in stream_openai_response(payload, headers):
90
- parsed = json.loads(chunk)
91
- delta = parsed['choices'][0]['delta'].get('content', '')
92
- st.session_state.answer += delta
93
- answer_box.markdown(st.session_state.answer)
 
 
 
 
 
 
 
 
 
 
 
94
 
95
- # Generate audio from final answer
96
  audio_key = str(uuid.uuid4())
97
  generate_audio(st.session_state.answer, voice_id, audio_key)
98
  st.session_state.audio_key = audio_key
99
 
100
  except Exception as e:
101
  st.error(f"πŸ”₯ Error: {e}")
 
102
 
103
- # Final UI display
104
  if st.session_state.answer:
105
  st.subheader("πŸ“œ Answer")
106
  st.success(st.session_state.answer)
107
  if st.session_state.audio_key:
108
  audio_path = os.path.join(AUDIO_DIR, f"{st.session_state.audio_key}.mp3")
109
- st.audio(audio_path)
 
 
 
 
1
  import os
2
  import uuid
3
  import json
 
4
  import requests
5
  import streamlit as st
6
  from dotenv import load_dotenv
7
  from utils import voice_map, get_voice_prompt_style, AUDIO_DIR
8
  from generate_audio import generate_audio
9
+ from logger_setup import logger
10
 
11
+ # Load API keys
12
  load_dotenv()
13
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 
14
 
15
+ # Streamlit config
 
 
 
 
16
  st.set_page_config(page_title="Voice Agent Pro", page_icon="🎀")
17
  st.title("πŸŽ™οΈ Voice Agent Pro")
18
  st.markdown("Summarized answers with expressive AI voices.")
19
+ logger.info("🎬 Streamlit app started")
20
 
21
+ # Sidebar: voice picker
22
  st.sidebar.header("🎚️ Voice Settings")
23
  voice_label = st.sidebar.selectbox("Choose a voice:", list(voice_map.keys()))
24
  voice_id = voice_map[voice_label]
25
  tone_prompt = get_voice_prompt_style(voice_label)
26
 
27
+ # Session state setup
28
  if "answer" not in st.session_state: st.session_state.answer = ""
29
  if "audio_key" not in st.session_state: st.session_state.audio_key = None
30
  if "file_text" not in st.session_state: st.session_state.file_text = ""
31
  if "key_points" not in st.session_state: st.session_state.key_points = []
32
 
33
+ # Inputs
34
  query = st.text_area("πŸ—¨οΈ Ask or refine something based on the bullets:", value="", placeholder="e.g., What makes you so cool, Grandma?", key="query")
35
  url = st.text_input("🌐 Optional URL to summarize:")
36
  uploaded_file = st.file_uploader("πŸ“Ž Or upload a file (PDF, TXT, DOCX)", type=["pdf", "txt", "docx"])
37
 
38
+ # Reset state
39
+ # Reset state safely without error
40
  if st.button("🧹 Clear All"):
41
+ logger.info("🧼 Clear All clicked β€” reloading app")
42
+ st.rerun()
43
+
 
 
44
 
45
+ # Helper: GPT streaming
46
  def stream_openai_response(payload, headers):
47
  with requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload, stream=True) as r:
48
  for line in r.iter_lines():
49
  if line and line.startswith(b"data: "):
50
  yield line[len(b"data: "):].decode()
51
 
52
+ # Main: Summarize and speak
53
  if st.button("πŸ” Summarize"):
54
  if not query and not url and not uploaded_file:
55
  st.warning("Please enter a question, a URL, or upload a file.")
56
  else:
57
  with st.spinner("Talking to GPT..."):
58
  try:
 
59
  if uploaded_file:
60
  st.session_state.file_text = uploaded_file.read().decode("utf-8")
61
 
 
62
  context = ""
63
  if st.session_state.file_text:
64
  context += st.session_state.file_text + "\n\n"
 
66
  context += f"Summarize this page: {url}\n\n"
67
  context += f"{tone_prompt}\n\nNow answer: {query}"
68
 
 
69
  headers = {"Authorization": f"Bearer {OPENAI_API_KEY}"}
70
  payload = {
71
  "model": "gpt-4o",
 
74
  "stream": True
75
  }
76
 
 
77
  st.session_state.answer = ""
78
  answer_box = st.empty()
79
+ logger.info("🧠 Starting GPT streaming")
80
 
81
  for chunk in stream_openai_response(payload, headers):
82
+ if chunk.strip() == "[DONE]":
83
+ logger.info("🟒 GPT stream complete marker received")
84
+ continue
85
+
86
+ try:
87
+ parsed = json.loads(chunk)
88
+ delta = parsed['choices'][0]['delta'].get('content', '')
89
+ st.session_state.answer += delta
90
+ answer_box.markdown(st.session_state.answer)
91
+ except json.JSONDecodeError as json_err:
92
+ logger.warning(f"⚠️ Skipping non-JSON chunk: {chunk}")
93
+ continue
94
+
95
+
96
+ logger.info("🧠 GPT response complete. Now generating audio...")
97
 
 
98
  audio_key = str(uuid.uuid4())
99
  generate_audio(st.session_state.answer, voice_id, audio_key)
100
  st.session_state.audio_key = audio_key
101
 
102
  except Exception as e:
103
  st.error(f"πŸ”₯ Error: {e}")
104
+ logger.exception("πŸ”₯ Exception during summarize or audio generation")
105
 
106
+ # Final display
107
  if st.session_state.answer:
108
  st.subheader("πŸ“œ Answer")
109
  st.success(st.session_state.answer)
110
  if st.session_state.audio_key:
111
  audio_path = os.path.join(AUDIO_DIR, f"{st.session_state.audio_key}.mp3")
112
+ if os.path.exists(audio_path):
113
+ st.audio(audio_path)
114
+ else:
115
+ st.error("❗ Audio file missing. Please check logs.")
generate_audio.py CHANGED
@@ -1,35 +1,50 @@
1
  import os
2
- import logging
3
- from elevenlabs import stream
4
- from elevenlabs.client import ElevenLabs
5
  from dotenv import load_dotenv
 
 
 
 
6
  load_dotenv()
7
 
 
 
8
 
9
- AUDIO_DIR = "audio_outputs"
10
- logger = logging.getLogger(__name__)
 
 
 
11
 
12
- client = ElevenLabs(api_key=os.getenv("ELEVENLABS_API_KEY"))
13
 
14
  def generate_audio(text: str, voice_id: str, audio_key: str):
15
  try:
16
  logger.info("🎯 Starting ElevenLabs audio generation")
 
17
 
18
- audio_stream = client.text_to_speech.convert_as_stream(
19
- text=text,
20
- voice_id=voice_id,
21
- model_id="eleven_multilingual_v2"
22
- )
 
 
 
 
 
23
 
24
- os.makedirs(AUDIO_DIR, exist_ok=True)
25
  output_path = os.path.join(AUDIO_DIR, f"{audio_key}.mp3")
26
 
27
- with open(output_path, "wb") as f:
28
- for chunk in audio_stream:
29
- if isinstance(chunk, bytes):
30
- f.write(chunk)
31
-
32
- logger.info(f"βœ… Audio saved to {output_path}")
 
 
 
33
 
34
  except Exception as e:
35
- logger.error(f"πŸ”₯ Error generating audio: {e}")
 
 
1
  import os
 
 
 
2
  from dotenv import load_dotenv
3
+ from elevenlabs.client import ElevenLabs
4
+ from logger_setup import logger
5
+
6
+ # Load environment variables
7
  load_dotenv()
8
 
9
+ # Use absolute path for output
10
+ AUDIO_DIR = os.path.join(os.path.dirname(__file__), "audio_outputs")
11
 
12
+ # Verify API key
13
+ api_key = os.getenv("ELEVENLABS_API_KEY")
14
+ if not api_key:
15
+ logger.error("❌ ELEVENLABS_API_KEY is missing or not loaded from .env")
16
+ raise RuntimeError("ELEVENLABS_API_KEY missing")
17
 
18
+ client = ElevenLabs(api_key=api_key)
19
 
20
  def generate_audio(text: str, voice_id: str, audio_key: str):
21
  try:
22
  logger.info("🎯 Starting ElevenLabs audio generation")
23
+ os.makedirs(AUDIO_DIR, exist_ok=True)
24
 
25
+ try:
26
+ audio_stream = client.text_to_speech.convert_as_stream(
27
+ text=text,
28
+ voice_id=voice_id,
29
+ model_id="eleven_multilingual_v2"
30
+ )
31
+ logger.info("βœ… Audio stream received from ElevenLabs")
32
+ except Exception as stream_err:
33
+ logger.error(f"❌ Failed to get audio stream: {stream_err}")
34
+ raise
35
 
 
36
  output_path = os.path.join(AUDIO_DIR, f"{audio_key}.mp3")
37
 
38
+ try:
39
+ with open(output_path, "wb") as f:
40
+ for chunk in audio_stream:
41
+ if isinstance(chunk, bytes):
42
+ f.write(chunk)
43
+ logger.info(f"βœ… Audio saved to {output_path}")
44
+ except Exception as write_err:
45
+ logger.error(f"❌ Failed to save audio to file: {write_err}")
46
+ raise
47
 
48
  except Exception as e:
49
+ logger.exception("πŸ”₯ Exception in generate_audio")
50
+ raise
logger_setup.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # logger_setup.py
2
+ import logging
3
+ import os
4
+
5
+ LOG_FILE = os.path.join(os.path.dirname(__file__), "logfile.log")
6
+
7
+ logging.basicConfig(
8
+ level=logging.INFO,
9
+ format="%(asctime)s [%(levelname)s] %(name)s - %(message)s",
10
+ handlers=[
11
+ logging.FileHandler(LOG_FILE, mode='a', encoding='utf-8'),
12
+ logging.StreamHandler()
13
+ ]
14
+ )
15
+
16
+ logger = logging.getLogger("voice-agent")