awacke1 commited on
Commit
6113e34
Β·
verified Β·
1 Parent(s): a3d5e91

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +305 -0
app.py ADDED
@@ -0,0 +1,305 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import base64
3
+ import os
4
+ import random
5
+ from PyPDF2 import PdfReader
6
+ import threading
7
+ import time
8
+ import hashlib
9
+ from datetime import datetime
10
+ import json
11
+ import asyncio
12
+ import edge_tts
13
+
14
+ # Patch asyncio for nested event loops
15
+ import nest_asyncio
16
+ nest_asyncio.apply()
17
+
18
+ # Character definitions with emojis
19
+ CHARACTERS = {
20
+ "Aria": {"emoji": "🌸", "voice": "en-US-AriaNeural"},
21
+ "Jenny": {"emoji": "🎢", "voice": "en-US-JennyNeural"},
22
+ "Sonia": {"emoji": "🌺", "voice": "en-GB-SoniaNeural"},
23
+ "Natasha": {"emoji": "🌌", "voice": "en-AU-NatashaNeural"},
24
+ "Clara": {"emoji": "🌷", "voice": "en-CA-ClaraNeural"},
25
+ "Guy": {"emoji": "🌟", "voice": "en-US-GuyNeural"},
26
+ "Ryan": {"emoji": "πŸ› οΈ", "voice": "en-GB-RyanNeural"},
27
+ "William": {"emoji": "🎻", "voice": "en-AU-WilliamNeural"},
28
+ "Liam": {"emoji": "🌟", "voice": "en-CA-LiamNeural"}
29
+ }
30
+
31
+ # Available English voices for Edge TTS
32
+ EDGE_TTS_VOICES = list(CHARACTERS.values())[0]["voice"]
33
+
34
+ # Initialize session state
35
+ if 'tts_voice' not in st.session_state:
36
+ st.session_state['tts_voice'] = random.choice(list(CHARACTERS.values()))["voice"]
37
+ if 'character' not in st.session_state:
38
+ st.session_state['character'] = random.choice(list(CHARACTERS.keys()))
39
+ if 'history' not in st.session_state:
40
+ st.session_state['history'] = []
41
+
42
+ class AudioProcessor:
43
+ def __init__(self):
44
+ self.cache_dir = "audio_cache"
45
+ self.markdown_dir = "markdown_files"
46
+ self.log_file = "history_log.md"
47
+ os.makedirs(self.cache_dir, exist_ok=True)
48
+ os.makedirs(self.markdown_dir, exist_ok=True)
49
+ self.metadata = self._load_metadata()
50
+
51
+ def _load_metadata(self):
52
+ metadata_file = os.path.join(self.cache_dir, "metadata.json")
53
+ return json.load(open(metadata_file)) if os.path.exists(metadata_file) else {}
54
+
55
+ def _save_metadata(self):
56
+ metadata_file = os.path.join(self.cache_dir, "metadata.json")
57
+ with open(metadata_file, 'w') as f:
58
+ json.dump(self.metadata, f)
59
+
60
+ def _log_action(self, action, details):
61
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
62
+ with open(self.log_file, 'a', encoding='utf-8') as f:
63
+ f.write(f"[{timestamp}] {action}: {details}\n")
64
+ st.session_state['history'].append(f"[{timestamp}] {action}: {details}")
65
+
66
+ async def create_audio(self, text, voice, character):
67
+ cache_key = hashlib.md5(f"{text}:{voice}".encode()).hexdigest()
68
+ cache_path = os.path.join(self.cache_dir, f"{cache_key}.mp3")
69
+
70
+ if cache_key in self.metadata and os.path.exists(cache_path):
71
+ return open(cache_path, 'rb').read()
72
+
73
+ # Clean text for speech
74
+ text = text.replace("\n", " ").replace("</s>", " ").strip()
75
+ if not text:
76
+ return None
77
+
78
+ # Generate audio with edge_tts
79
+ communicate = edge_tts.Communicate(text, voice)
80
+ await communicate.save(cache_path)
81
+
82
+ # Save markdown file
83
+ timestamp = datetime.now().strftime("%I%M %p %m%d%Y")
84
+ title_words = ' '.join(text.split()[:10])
85
+ filename = f"{timestamp} {character} {title_words}.md"
86
+ filepath = os.path.join(self.markdown_dir, filename)
87
+ with open(filepath, 'w', encoding='utf-8') as f:
88
+ f.write(f"# {title_words}\n\n**Character:** {character}\n**Voice:** {voice}\n\n{text}")
89
+
90
+ # Log action
91
+ self._log_action("Text to Audio", f"Created audio for '{title_words}' with {character} ({voice})")
92
+
93
+ # Update metadata
94
+ self.metadata[cache_key] = {
95
+ 'timestamp': datetime.now().isoformat(),
96
+ 'text_length': len(text),
97
+ 'voice': voice,
98
+ 'character': character,
99
+ 'markdown_file': filename
100
+ }
101
+ self._save_metadata()
102
+
103
+ return open(cache_path, 'rb').read()
104
+
105
+ def get_download_link(bin_data, filename, size_mb=None):
106
+ b64 = base64.b64encode(bin_data).decode()
107
+ size_str = f"({size_mb:.1f} MB)" if size_mb else ""
108
+ return f'''
109
+ <div class="download-container">
110
+ <a href="data:audio/mpeg;base64,{b64}"
111
+ download="{filename}" class="download-link">πŸ“₯ {filename}</a>
112
+ <div class="file-info">{size_str}</div>
113
+ </div>
114
+ '''
115
+
116
+ def process_pdf(pdf_file, max_pages, voice, character, audio_processor):
117
+ reader = PdfReader(pdf_file)
118
+ total_pages = min(len(reader.pages), max_pages)
119
+ texts, audios = [], {}
120
+
121
+ async def process_page(i, text):
122
+ audio_data = await audio_processor.create_audio(text, voice, character)
123
+ audios[i] = audio_data
124
+
125
+ # Extract text and start audio processing
126
+ for i in range(total_pages):
127
+ text = reader.pages[i].extract_text()
128
+ texts.append(text)
129
+ # Process audio in background
130
+ threading.Thread(
131
+ target=lambda: asyncio.run(process_page(i, text))
132
+ ).start()
133
+
134
+ return texts, audios, total_pages
135
+
136
+ def main():
137
+ st.set_page_config(page_title="πŸ“šPDF πŸͺ„Text to πŸ—£οΈSpeech πŸ€–Transformer", page_icon="πŸ“š", layout="wide")
138
+
139
+ # Apply styling
140
+ st.markdown("""
141
+ <style>
142
+ .download-link {
143
+ color: #1E90FF;
144
+ text-decoration: none;
145
+ padding: 8px 12px;
146
+ margin: 5px;
147
+ border: 1px solid #1E90FF;
148
+ border-radius: 5px;
149
+ display: inline-block;
150
+ transition: all 0.3s ease;
151
+ }
152
+ .download-link:hover {
153
+ background-color: #1E90FF;
154
+ color: white;
155
+ }
156
+ .file-info {
157
+ font-size: 0.8em;
158
+ color: gray;
159
+ margin-top: 4px;
160
+ }
161
+ </style>
162
+ """, unsafe_allow_html=True)
163
+
164
+ # Initialize processor
165
+ audio_processor = AudioProcessor()
166
+
167
+ # Sidebar settings
168
+ st.sidebar.title(f"{CHARACTERS[st.session_state['character']]['emoji']} Character Name: {st.session_state['character']}")
169
+
170
+ # Voice selection UI
171
+ st.sidebar.markdown("### 🎀 Voice Settings")
172
+ selected_voice = st.sidebar.selectbox(
173
+ "πŸ‘„ Select TTS Voice:",
174
+ options=[char["voice"] for char in CHARACTERS.values()],
175
+ index=[char["voice"] for char in CHARACTERS.values()].index(st.session_state['tts_voice']),
176
+ key="voice_select"
177
+ )
178
+ selected_character = next(char for char, info in CHARACTERS.items() if info["voice"] == selected_voice)
179
+
180
+ st.sidebar.markdown("""
181
+ # πŸŽ™οΈ Voice Character Agent Selector 🎭
182
+ *Female Voices*:
183
+ - 🌸 **Aria** – Elegant, creative storytelling
184
+ - 🎢 **Jenny** – Friendly, conversational
185
+ - 🌺 **Sonia** – Bold, confident
186
+ - 🌌 **Natasha** – Sophisticated, mysterious
187
+ - 🌷 **Clara** – Cheerful, empathetic
188
+
189
+ *Male Voices*:
190
+ - 🌟 **Guy** – Authoritative, versatile
191
+ - πŸ› οΈ **Ryan** – Approachable, casual
192
+ - 🎻 **William** – Classic, scholarly
193
+ - 🌟 **Liam** – Energetic, engaging
194
+ """)
195
+
196
+ if selected_voice != st.session_state['tts_voice'] or selected_character != st.session_state['character']:
197
+ st.session_state['tts_voice'] = selected_voice
198
+ st.session_state['character'] = selected_character
199
+ audio_processor._log_action("Voice Change", f"Changed to {selected_character} ({selected_voice})")
200
+ st.rerun()
201
+
202
+ # Markdown file history
203
+ st.sidebar.markdown("### πŸ“œ History")
204
+ md_files = [f for f in os.listdir(audio_processor.markdown_dir) if f.endswith('.md') and f != 'README.md']
205
+ for md_file in md_files:
206
+ col1, col2, col3 = st.sidebar.columns([3, 1, 1])
207
+ with col1:
208
+ if st.button(f"πŸ‘οΈ {md_file}", key=f"view_{md_file}"):
209
+ with open(os.path.join(audio_processor.markdown_dir, md_file), 'r', encoding='utf-8') as f:
210
+ st.session_state['current_md'] = f.read()
211
+ audio_processor._log_action("View File", f"Viewed {md_file}")
212
+ with col2:
213
+ if st.button("πŸ—‘οΈ", key=f"delete_{md_file}"):
214
+ os.remove(os.path.join(audio_processor.markdown_dir, md_file))
215
+ audio_processor._log_action("Delete File", f"Deleted {md_file}")
216
+ st.rerun()
217
+ with col3:
218
+ st.write("")
219
+
220
+ # History log
221
+ st.sidebar.markdown("### πŸ“‹ Action History")
222
+ for entry in st.session_state['history']:
223
+ st.sidebar.write(entry)
224
+
225
+ # Main interface
226
+ st.markdown("<h1>πŸ“š PDF to Audio Converter 🎧</h1>", unsafe_allow_html=True)
227
+
228
+ # Display current markdown if selected
229
+ if 'current_md' in st.session_state:
230
+ st.markdown(st.session_state['current_md'])
231
+
232
+ col1, col2 = st.columns(2)
233
+ with col1:
234
+ uploaded_file = st.file_uploader("Choose a PDF file", "pdf")
235
+ with col2:
236
+ max_pages = st.slider('Select pages to process', min_value=1, max_value=100, value=10)
237
+
238
+ if uploaded_file:
239
+ progress_bar = st.progress(0)
240
+ status = st.empty()
241
+
242
+ with st.spinner('Processing PDF...'):
243
+ texts, audios, total_pages = process_pdf(
244
+ uploaded_file, max_pages,
245
+ st.session_state['tts_voice'],
246
+ st.session_state['character'],
247
+ audio_processor
248
+ )
249
+
250
+ for i, text in enumerate(texts):
251
+ with st.expander(f"Page {i+1}", expanded=i==0):
252
+ st.markdown(text)
253
+
254
+ # Wait for audio processing
255
+ while i not in audios:
256
+ time.sleep(0.1)
257
+ if audios[i]:
258
+ st.audio(audios[i], format='audio/mp3')
259
+
260
+ # Add download link
261
+ if audios[i]:
262
+ size_mb = len(audios[i]) / (1024 * 1024)
263
+ st.sidebar.markdown(
264
+ get_download_link(audios[i], f'page_{i+1}.mp3', size_mb),
265
+ unsafe_allow_html=True
266
+ )
267
+
268
+ progress_bar.progress((i + 1) / total_pages)
269
+ status.text(f"Processing page {i+1}/{total_pages}")
270
+
271
+ st.success(f"βœ… Successfully processed {total_pages} pages!")
272
+ audio_processor._log_action("PDF Processed", f"Processed {uploaded_file.name} ({total_pages} pages)")
273
+
274
+ # Text to Audio section
275
+ st.markdown("### ✍️ Text to Audio")
276
+ prompt = st.text_area("Enter text to convert to audio", height=200)
277
+
278
+ if prompt:
279
+ with st.spinner('Converting text to audio...'):
280
+ audio_data = asyncio.run(audio_processor.create_audio(
281
+ prompt,
282
+ st.session_state['tts_voice'],
283
+ st.session_state['character']
284
+ ))
285
+ if audio_data:
286
+ st.audio(audio_data, format='audio/mp3')
287
+
288
+ size_mb = len(audio_data) / (1024 * 1024)
289
+ st.sidebar.markdown("### 🎡 Custom Audio")
290
+ st.sidebar.markdown(
291
+ get_download_link(audio_data, 'custom_text.mp3', size_mb),
292
+ unsafe_allow_html=True
293
+ )
294
+
295
+ # Cache management
296
+ if st.sidebar.button("Clear Cache"):
297
+ for file in os.listdir(audio_processor.cache_dir):
298
+ os.remove(os.path.join(audio_processor.cache_dir, file))
299
+ audio_processor.metadata = {}
300
+ audio_processor._save_metadata()
301
+ audio_processor._log_action("Clear Cache", "Cleared audio cache")
302
+ st.sidebar.success("Cache cleared successfully!")
303
+
304
+ if __name__ == "__main__":
305
+ main()