awacke1 commited on
Commit
e7eeb32
Β·
verified Β·
1 Parent(s): 2573c51

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +232 -0
app.py ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import base64
3
+ import os
4
+ from PyPDF2 import PdfReader
5
+ import threading
6
+ import time
7
+ import hashlib
8
+ from datetime import datetime
9
+ import json
10
+ import asyncio
11
+ import edge_tts
12
+
13
+ # Patch asyncio for nested event loops
14
+ import nest_asyncio
15
+ nest_asyncio.apply()
16
+
17
+ # Available English voices for Edge TTS
18
+ EDGE_TTS_VOICES = [
19
+ "en-US-AriaNeural",
20
+ "en-US-GuyNeural",
21
+ "en-US-JennyNeural",
22
+ "en-GB-SoniaNeural",
23
+ "en-GB-RyanNeural",
24
+ "en-AU-NatashaNeural",
25
+ "en-AU-WilliamNeural",
26
+ "en-CA-ClaraNeural",
27
+ "en-CA-LiamNeural"
28
+ ]
29
+
30
+ # Initialize session state for voice selection
31
+ if 'tts_voice' not in st.session_state:
32
+ st.session_state['tts_voice'] = EDGE_TTS_VOICES[0]
33
+
34
+ class AudioProcessor:
35
+ def __init__(self):
36
+ self.cache_dir = "audio_cache"
37
+ os.makedirs(self.cache_dir, exist_ok=True)
38
+ self.metadata = self._load_metadata()
39
+
40
+ def _load_metadata(self):
41
+ metadata_file = os.path.join(self.cache_dir, "metadata.json")
42
+ return json.load(open(metadata_file)) if os.path.exists(metadata_file) else {}
43
+
44
+ def _save_metadata(self):
45
+ metadata_file = os.path.join(self.cache_dir, "metadata.json")
46
+ with open(metadata_file, 'w') as f:
47
+ json.dump(self.metadata, f)
48
+
49
+ async def create_audio(self, text, voice='en-US-AriaNeural'):
50
+ cache_key = hashlib.md5(f"{text}:{voice}".encode()).hexdigest()
51
+ cache_path = os.path.join(self.cache_dir, f"{cache_key}.mp3")
52
+
53
+ if cache_key in self.metadata and os.path.exists(cache_path):
54
+ return open(cache_path, 'rb').read()
55
+
56
+ # Clean text for speech
57
+ text = text.replace("\n", " ").replace("</s>", " ").strip()
58
+ if not text:
59
+ return None
60
+
61
+ # Generate audio with edge_tts
62
+ communicate = edge_tts.Communicate(text, voice)
63
+ await communicate.save(cache_path)
64
+
65
+ # Update metadata
66
+ self.metadata[cache_key] = {
67
+ 'timestamp': datetime.now().isoformat(),
68
+ 'text_length': len(text),
69
+ 'voice': voice
70
+ }
71
+ self._save_metadata()
72
+
73
+ return open(cache_path, 'rb').read()
74
+
75
+ def get_download_link(bin_data, filename, size_mb=None):
76
+ b64 = base64.b64encode(bin_data).decode()
77
+ size_str = f"({size_mb:.1f} MB)" if size_mb else ""
78
+ return f'''
79
+ <div class="download-container">
80
+ <a href="data:audio/mpeg;base64,{b64}"
81
+ download="{filename}" class="download-link">πŸ“₯ {filename}</a>
82
+ <div class="file-info">{size_str}</div>
83
+ </div>
84
+ '''
85
+
86
+ def process_pdf(pdf_file, max_pages, voice, audio_processor):
87
+ reader = PdfReader(pdf_file)
88
+ total_pages = min(len(reader.pages), max_pages)
89
+ texts, audios = [], {}
90
+
91
+ async def process_page(i, text):
92
+ audio_data = await audio_processor.create_audio(text, voice)
93
+ audios[i] = audio_data
94
+
95
+ # Extract text and start audio processing
96
+ for i in range(total_pages):
97
+ text = reader.pages[i].extract_text()
98
+ texts.append(text)
99
+ # Process audio in background
100
+ threading.Thread(
101
+ target=lambda: asyncio.run(process_page(i, text))
102
+ ).start()
103
+
104
+ return texts, audios, total_pages
105
+
106
+ def main():
107
+ st.set_page_config(page_title="πŸ“š PDF to Audio 🎧", page_icon="πŸŽ‰", layout="wide")
108
+
109
+ # Apply styling
110
+ st.markdown("""
111
+ <style>
112
+ .download-link {
113
+ color: #1E90FF;
114
+ text-decoration: none;
115
+ padding: 8px 12px;
116
+ margin: 5px;
117
+ border: 1px solid #1E90FF;
118
+ border-radius: 5px;
119
+ display: inline-block;
120
+ transition: all 0.3s ease;
121
+ }
122
+ .download-link:hover {
123
+ background-color: #1E90FF;
124
+ color: white;
125
+ }
126
+ .file-info {
127
+ font-size: 0.8em;
128
+ color: gray;
129
+ margin-top: 4px;
130
+ }
131
+ </style>
132
+ """, unsafe_allow_html=True)
133
+
134
+ # Initialize processor
135
+ audio_processor = AudioProcessor()
136
+
137
+ # Sidebar settings
138
+ st.sidebar.title("πŸ“₯ Downloads & Settings")
139
+
140
+ # Voice selection UI from second app
141
+ st.sidebar.markdown("### 🎀 Voice Settings")
142
+ selected_voice = st.sidebar.selectbox(
143
+ "πŸ‘„ Select TTS Voice:",
144
+ options=EDGE_TTS_VOICES,
145
+ index=EDGE_TTS_VOICES.index(st.session_state['tts_voice'])
146
+ )
147
+ st.sidebar.markdown("""
148
+ # πŸŽ™οΈ Voice Character Agent Selector 🎭
149
+ *Female Voices*:
150
+ - 🌸 **Aria** – Elegant, creative storytelling
151
+ - 🎢 **Jenny** – Friendly, conversational
152
+ - 🌺 **Sonia** – Bold, confident
153
+ - 🌌 **Natasha** – Sophisticated, mysterious
154
+ - 🌷 **Clara** – Cheerful, empathetic
155
+
156
+ *Male Voices*:
157
+ - 🌟 **Guy** – Authoritative, versatile
158
+ - πŸ› οΈ **Ryan** – Approachable, casual
159
+ - 🎻 **William** – Classic, scholarly
160
+ - 🌟 **Liam** – Energetic, engaging
161
+ """)
162
+
163
+ if selected_voice != st.session_state['tts_voice']:
164
+ st.session_state['tts_voice'] = selected_voice
165
+ st.rerun()
166
+
167
+ # Main interface
168
+ st.markdown("<h1>πŸ“š PDF to Audio Converter 🎧</h1>", unsafe_allow_html=True)
169
+
170
+ col1, col2 = st.columns(2)
171
+ with col1:
172
+ uploaded_file = st.file_uploader("Choose a PDF file", "pdf")
173
+ with col2:
174
+ max_pages = st.slider('Select pages to process', min_value=1, max_value=100, value=10)
175
+
176
+ if uploaded_file:
177
+ progress_bar = st.progress(0)
178
+ status = st.empty()
179
+
180
+ with st.spinner('Processing PDF...'):
181
+ texts, audios, total_pages = process_pdf(uploaded_file, max_pages, st.session_state['tts_voice'], audio_processor)
182
+
183
+ for i, text in enumerate(texts):
184
+ with st.expander(f"Page {i+1}", expanded=i==0):
185
+ st.markdown(text)
186
+
187
+ # Wait for audio processing
188
+ while i not in audios:
189
+ time.sleep(0.1)
190
+ if audios[i]:
191
+ st.audio(audios[i], format='audio/mp3')
192
+
193
+ # Add download link
194
+ if audios[i]:
195
+ size_mb = len(audios[i]) / (1024 * 1024)
196
+ st.sidebar.markdown(
197
+ get_download_link(audios[i], f'page_{i+1}.mp3', size_mb),
198
+ unsafe_allow_html=True
199
+ )
200
+
201
+ progress_bar.progress((i + 1) / total_pages)
202
+ status.text(f"Processing page {i+1}/{total_pages}")
203
+
204
+ st.success(f"βœ… Successfully processed {total_pages} pages!")
205
+
206
+ # Text to Audio section
207
+ st.markdown("### ✍️ Text to Audio")
208
+ prompt = st.text_area("Enter text to convert to audio", height=200)
209
+
210
+ if prompt:
211
+ with st.spinner('Converting text to audio...'):
212
+ audio_data = asyncio.run(audio_processor.create_audio(prompt, st.session_state['tts_voice']))
213
+ if audio_data:
214
+ st.audio(audio_data, format='audio/mp3')
215
+
216
+ size_mb = len(audio_data) / (1024 * 1024)
217
+ st.sidebar.markdown("### 🎡 Custom Audio")
218
+ st.sidebar.markdown(
219
+ get_download_link(audio_data, 'custom_text.mp3', size_mb),
220
+ unsafe_allow_html=True
221
+ )
222
+
223
+ # Cache management
224
+ if st.sidebar.button("Clear Cache"):
225
+ for file in os.listdir(audio_processor.cache_dir):
226
+ os.remove(os.path.join(audio_processor.cache_dir, file))
227
+ audio_processor.metadata = {}
228
+ audio_processor._save_metadata()
229
+ st.sidebar.success("Cache cleared successfully!")
230
+
231
+ if __name__ == "__main__":
232
+ main()