Spaces:

SohomToom
/

DocToAudioConverted

Sleeping

SohomToom commited on May 6, 2025

Commit

a3e2313

verified ·

1 Parent(s): 3a9c6ec

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,30 +1,35 @@
 import gradio as gr
 from docx import Document
 from TTS.api import TTS
 import tempfile
-import os
-os.environ["LIBROSA_CACHE_DIR"] = "/tmp/librosa_cache"
-# Load TTS model once
-#tts = TTS(model_name="tts_models/en/vctk/vits", progress_bar=False, gpu=False)
 tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False)
-def extract_text(docx_file):
-    doc = Document(docx_file)
-    return "\n".join([para.text for para in doc.paragraphs if para.text.strip()])
-def generate_audio(docx_file):
-    text = extract_text(docx_file.name)
-    tts.tts_to_file(text=text, file_path="output.wav")
-    return "output.wav"
-# Gradio UI
 interface = gr.Interface(
-    fn=generate_audio,
-    inputs=gr.File(file_types=[".docx"], label="Upload your DOCX script"),
-    outputs=gr.Audio(label="Realistic Voiceover", type="filepath"),
-    title="DOCX to Voiceover (Offline, Realistic)",
-    description="Upload a .docx script and get a realistic WAV voiceover using Coqui TTS."
 )
-interface.launch()

+import os
+os.environ["NUMBA_DISABLE_CACHE"] = "1"  # Fix for Numba caching issue in cloud
 import gradio as gr
 from docx import Document
 from TTS.api import TTS
 import tempfile
+# Load Coqui TTS model (offline + realistic)
 tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False)
+def docx_to_audio(doc_file):
+    # Read text from .docx file
+    document = Document(doc_file.name)
+    full_text = "\n".join([para.text for para in document.paragraphs if para.text.strip()])
+    # Create temporary output .wav file
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
+        audio_path = f.name
+    # Generate audio
+    tts.tts_to_file(text=full_text, file_path=audio_path)
+    return audio_path
+# Gradio interface
 interface = gr.Interface(
+    fn=docx_to_audio,
+    inputs=gr.File(label="Upload .docx File"),
+    outputs=gr.Audio(type="filepath", label="Download Audio"),
+    title="Docx to Realistic Voiceover",
+    description="Upload a .docx file and get realistic speech audio."
 )
+if __name__ == "__main__":
+    interface.launch()