awacke1 commited on
Commit
1478e25
Β·
verified Β·
1 Parent(s): 6113e34

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -34
app.py CHANGED
@@ -2,6 +2,7 @@ import streamlit as st
2
  import base64
3
  import os
4
  import random
 
5
  from PyPDF2 import PdfReader
6
  import threading
7
  import time
@@ -28,12 +29,9 @@ CHARACTERS = {
28
  "Liam": {"emoji": "🌟", "voice": "en-CA-LiamNeural"}
29
  }
30
 
31
- # Available English voices for Edge TTS
32
- EDGE_TTS_VOICES = list(CHARACTERS.values())[0]["voice"]
33
-
34
  # Initialize session state
35
  if 'tts_voice' not in st.session_state:
36
- st.session_state['tts_voice'] = random.choice(list(CHARACTERS.values()))["voice"]
37
  if 'character' not in st.session_state:
38
  st.session_state['character'] = random.choice(list(CHARACTERS.keys()))
39
  if 'history' not in st.session_state:
@@ -65,30 +63,34 @@ class AudioProcessor:
65
 
66
  async def create_audio(self, text, voice, character):
67
  cache_key = hashlib.md5(f"{text}:{voice}".encode()).hexdigest()
68
- cache_path = os.path.join(self.cache_dir, f"{cache_key}.mp3")
69
-
70
- if cache_key in self.metadata and os.path.exists(cache_path):
71
- return open(cache_path, 'rb').read()
72
-
73
  # Clean text for speech
74
  text = text.replace("\n", " ").replace("</s>", " ").strip()
75
  if not text:
76
- return None
 
 
 
 
 
 
 
 
 
 
 
 
77
 
78
  # Generate audio with edge_tts
79
  communicate = edge_tts.Communicate(text, voice)
80
- await communicate.save(cache_path)
 
 
 
 
 
81
 
82
- # Save markdown file
83
- timestamp = datetime.now().strftime("%I%M %p %m%d%Y")
84
- title_words = ' '.join(text.split()[:10])
85
- filename = f"{timestamp} {character} {title_words}.md"
86
- filepath = os.path.join(self.markdown_dir, filename)
87
- with open(filepath, 'w', encoding='utf-8') as f:
88
- f.write(f"# {title_words}\n\n**Character:** {character}\n**Voice:** {voice}\n\n{text}")
89
-
90
  # Log action
91
- self._log_action("Text to Audio", f"Created audio for '{title_words}' with {character} ({voice})")
92
 
93
  # Update metadata
94
  self.metadata[cache_key] = {
@@ -96,11 +98,12 @@ class AudioProcessor:
96
  'text_length': len(text),
97
  'voice': voice,
98
  'character': character,
99
- 'markdown_file': filename
 
100
  }
101
  self._save_metadata()
102
 
103
- return open(cache_path, 'rb').read()
104
 
105
  def get_download_link(bin_data, filename, size_mb=None):
106
  b64 = base64.b64encode(bin_data).decode()
@@ -119,14 +122,12 @@ def process_pdf(pdf_file, max_pages, voice, character, audio_processor):
119
  texts, audios = [], {}
120
 
121
  async def process_page(i, text):
122
- audio_data = await audio_processor.create_audio(text, voice, character)
123
  audios[i] = audio_data
124
 
125
- # Extract text and start audio processing
126
  for i in range(total_pages):
127
  text = reader.pages[i].extract_text()
128
  texts.append(text)
129
- # Process audio in background
130
  threading.Thread(
131
  target=lambda: asyncio.run(process_page(i, text))
132
  ).start()
@@ -200,7 +201,7 @@ def main():
200
  st.rerun()
201
 
202
  # Markdown file history
203
- st.sidebar.markdown("### πŸ“œ History")
204
  md_files = [f for f in os.listdir(audio_processor.markdown_dir) if f.endswith('.md') and f != 'README.md']
205
  for md_file in md_files:
206
  col1, col2, col3 = st.sidebar.columns([3, 1, 1])
@@ -210,9 +211,28 @@ def main():
210
  st.session_state['current_md'] = f.read()
211
  audio_processor._log_action("View File", f"Viewed {md_file}")
212
  with col2:
213
- if st.button("πŸ—‘οΈ", key=f"delete_{md_file}"):
214
  os.remove(os.path.join(audio_processor.markdown_dir, md_file))
215
- audio_processor._log_action("Delete File", f"Deleted {md_file}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
  st.rerun()
217
  with col3:
218
  st.write("")
@@ -225,9 +245,12 @@ def main():
225
  # Main interface
226
  st.markdown("<h1>πŸ“š PDF to Audio Converter 🎧</h1>", unsafe_allow_html=True)
227
 
228
- # Display current markdown if selected
229
  if 'current_md' in st.session_state:
230
  st.markdown(st.session_state['current_md'])
 
 
 
231
 
232
  col1, col2 = st.columns(2)
233
  with col1:
@@ -251,13 +274,11 @@ def main():
251
  with st.expander(f"Page {i+1}", expanded=i==0):
252
  st.markdown(text)
253
 
254
- # Wait for audio processing
255
  while i not in audios:
256
  time.sleep(0.1)
257
  if audios[i]:
258
  st.audio(audios[i], format='audio/mp3')
259
 
260
- # Add download link
261
  if audios[i]:
262
  size_mb = len(audios[i]) / (1024 * 1024)
263
  st.sidebar.markdown(
@@ -277,7 +298,7 @@ def main():
277
 
278
  if prompt:
279
  with st.spinner('Converting text to audio...'):
280
- audio_data = asyncio.run(audio_processor.create_audio(
281
  prompt,
282
  st.session_state['tts_voice'],
283
  st.session_state['character']
@@ -287,8 +308,9 @@ def main():
287
 
288
  size_mb = len(audio_data) / (1024 * 1024)
289
  st.sidebar.markdown("### 🎡 Custom Audio")
 
290
  st.sidebar.markdown(
291
- get_download_link(audio_data, 'custom_text.mp3', size_mb),
292
  unsafe_allow_html=True
293
  )
294
 
@@ -296,9 +318,12 @@ def main():
296
  if st.sidebar.button("Clear Cache"):
297
  for file in os.listdir(audio_processor.cache_dir):
298
  os.remove(os.path.join(audio_processor.cache_dir, file))
 
 
 
299
  audio_processor.metadata = {}
300
  audio_processor._save_metadata()
301
- audio_processor._log_action("Clear Cache", "Cleared audio cache")
302
  st.sidebar.success("Cache cleared successfully!")
303
 
304
  if __name__ == "__main__":
 
2
  import base64
3
  import os
4
  import random
5
+ import glob
6
  from PyPDF2 import PdfReader
7
  import threading
8
  import time
 
29
  "Liam": {"emoji": "🌟", "voice": "en-CA-LiamNeural"}
30
  }
31
 
 
 
 
32
  # Initialize session state
33
  if 'tts_voice' not in st.session_state:
34
+ st.session_state['tts_voice'] = random.choice([char["voice"] for char in CHARACTERS.values()])
35
  if 'character' not in st.session_state:
36
  st.session_state['character'] = random.choice(list(CHARACTERS.keys()))
37
  if 'history' not in st.session_state:
 
63
 
64
  async def create_audio(self, text, voice, character):
65
  cache_key = hashlib.md5(f"{text}:{voice}".encode()).hexdigest()
 
 
 
 
 
66
  # Clean text for speech
67
  text = text.replace("\n", " ").replace("</s>", " ").strip()
68
  if not text:
69
+ return None, None
70
+
71
+ # Generate filename
72
+ timestamp = datetime.now().strftime("%I%M %p %m%d%Y")
73
+ title_words = '_'.join(text.split()[:10])
74
+ filename_base = f"{timestamp}_{character}_{title_words}"
75
+ audio_filename = f"{filename_base}.mp3"
76
+ md_filename = f"{filename_base}.md"
77
+ audio_path = os.path.join(self.cache_dir, audio_filename)
78
+
79
+ # Check cache
80
+ if cache_key in self.metadata and os.path.exists(audio_path):
81
+ return open(audio_path, 'rb').read(), cache_key
82
 
83
  # Generate audio with edge_tts
84
  communicate = edge_tts.Communicate(text, voice)
85
+ await communicate.save(audio_path)
86
+
87
+ # Save markdown
88
+ md_filepath = os.path.join(self.markdown_dir, md_filename)
89
+ with open(md_filepath, 'w', encoding='utf-8') as f:
90
+ f.write(f"# {title_words.replace('_', ' ')}\n\n**Character:** {character}\n**Voice:** {voice}\n\n{text}")
91
 
 
 
 
 
 
 
 
 
92
  # Log action
93
+ self._log_action("Text to Audio", f"Created audio and markdown for '{title_words}' with {character} ({voice})")
94
 
95
  # Update metadata
96
  self.metadata[cache_key] = {
 
98
  'text_length': len(text),
99
  'voice': voice,
100
  'character': character,
101
+ 'markdown_file': md_filename,
102
+ 'audio_file': audio_filename
103
  }
104
  self._save_metadata()
105
 
106
+ return open(audio_path, 'rb').read(), cache_key
107
 
108
  def get_download_link(bin_data, filename, size_mb=None):
109
  b64 = base64.b64encode(bin_data).decode()
 
122
  texts, audios = [], {}
123
 
124
  async def process_page(i, text):
125
+ audio_data, _ = await audio_processor.create_audio(text, voice, character)
126
  audios[i] = audio_data
127
 
 
128
  for i in range(total_pages):
129
  text = reader.pages[i].extract_text()
130
  texts.append(text)
 
131
  threading.Thread(
132
  target=lambda: asyncio.run(process_page(i, text))
133
  ).start()
 
201
  st.rerun()
202
 
203
  # Markdown file history
204
+ st.sidebar.markdown("### πŸ“œ Markdown History")
205
  md_files = [f for f in os.listdir(audio_processor.markdown_dir) if f.endswith('.md') and f != 'README.md']
206
  for md_file in md_files:
207
  col1, col2, col3 = st.sidebar.columns([3, 1, 1])
 
211
  st.session_state['current_md'] = f.read()
212
  audio_processor._log_action("View File", f"Viewed {md_file}")
213
  with col2:
214
+ if st.button("πŸ—‘οΈ", key=f"delete_md_{md_file}"):
215
  os.remove(os.path.join(audio_processor.markdown_dir, md_file))
216
+ audio_processor._log_action("Delete Markdown", f"Deleted {md_file}")
217
+ st.rerun()
218
+ with col3:
219
+ st.write("")
220
+
221
+ # Audio file history
222
+ st.sidebar.markdown("### 🎡 Audio History")
223
+ audio_files = [f for f in glob.glob(os.path.join(audio_processor.cache_dir, "*.mp3")) if os.path.basename(f).startswith(tuple([f.split('.')[0] for f in md_files]))]
224
+ for audio_file in audio_files:
225
+ audio_filename = os.path.basename(audio_file)
226
+ col1, col2, col3 = st.sidebar.columns([3, 1, 1])
227
+ with col1:
228
+ if st.button(f"▢️ {audio_filename}", key=f"play_{audio_filename}"):
229
+ with open(audio_file, 'rb') as f:
230
+ st.session_state['current_audio'] = {'data': f.read(), 'name': audio_filename}
231
+ audio_processor._log_action("Play Audio", f"Played {audio_filename}")
232
+ with col2:
233
+ if st.button("πŸ—‘οΈ", key=f"delete_audio_{audio_filename}"):
234
+ os.remove(audio_file)
235
+ audio_processor._log_action("Delete Audio", f"Deleted {audio_filename}")
236
  st.rerun()
237
  with col3:
238
  st.write("")
 
245
  # Main interface
246
  st.markdown("<h1>πŸ“š PDF to Audio Converter 🎧</h1>", unsafe_allow_html=True)
247
 
248
+ # Display current markdown or audio if selected
249
  if 'current_md' in st.session_state:
250
  st.markdown(st.session_state['current_md'])
251
+ if 'current_audio' in st.session_state:
252
+ st.markdown(f"**Playing:** {st.session_state['current_audio']['name']}")
253
+ st.audio(st.session_state['current_audio']['data'], format='audio/mp3')
254
 
255
  col1, col2 = st.columns(2)
256
  with col1:
 
274
  with st.expander(f"Page {i+1}", expanded=i==0):
275
  st.markdown(text)
276
 
 
277
  while i not in audios:
278
  time.sleep(0.1)
279
  if audios[i]:
280
  st.audio(audios[i], format='audio/mp3')
281
 
 
282
  if audios[i]:
283
  size_mb = len(audios[i]) / (1024 * 1024)
284
  st.sidebar.markdown(
 
298
 
299
  if prompt:
300
  with st.spinner('Converting text to audio...'):
301
+ audio_data, cache_key = asyncio.run(audio_processor.create_audio(
302
  prompt,
303
  st.session_state['tts_voice'],
304
  st.session_state['character']
 
308
 
309
  size_mb = len(audio_data) / (1024 * 1024)
310
  st.sidebar.markdown("### 🎡 Custom Audio")
311
+ audio_filename = audio_processor.metadata[cache_key]['audio_file']
312
  st.sidebar.markdown(
313
+ get_download_link(audio_data, audio_filename, size_mb),
314
  unsafe_allow_html=True
315
  )
316
 
 
318
  if st.sidebar.button("Clear Cache"):
319
  for file in os.listdir(audio_processor.cache_dir):
320
  os.remove(os.path.join(audio_processor.cache_dir, file))
321
+ for file in os.listdir(audio_processor.markdown_dir):
322
+ if file != 'README.md':
323
+ os.remove(os.path.join(audio_processor.markdown_dir, file))
324
  audio_processor.metadata = {}
325
  audio_processor._save_metadata()
326
+ audio_processor._log_action("Clear Cache", "Cleared audio and markdown cache")
327
  st.sidebar.success("Cache cleared successfully!")
328
 
329
  if __name__ == "__main__":