Spaces:

yasserrmd
/

NotebookLlama

Running

App Files Files Community

yasserrmd commited on Oct 31, 2024

Commit

5671703

verified ·

1 Parent(s): 2a219dd

Create generate_audio_edgetts.py

Browse files

Files changed (1) hide show

generate_audio_edgetts.py +91 -0

generate_audio_edgetts.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import spaces
+import pickle
+import numpy as np
+from tqdm import tqdm
+import edge_tts
+import ast
+import asyncio
+@spaces.GPU
+class EdgeTTSGenerator:
+    """
+    A class to generate podcast-style audio from a transcript using edge-tts.
+    """
+    def __init__(self, transcript_file_path, output_audio_path):
+        """
+        Initialize the TTS generator with the path to the rewritten transcript file.
+        Args:
+            transcript_file_path (str): Path to the file containing the rewritten transcript.
+            output_audio_path (str): Path to save the generated audio file.
+        """
+        self.transcript_file_path = transcript_file_path
+        self.output_audio_path = output_audio_path
+        # Speaker descriptions for edge-tts voices
+        self.speaker1_voice = "en-US-AriaNeural"
+        self.speaker2_voice = "en-US-GuyNeural"
+    def load_transcript(self):
+        """
+        Loads the rewritten transcript from the specified file.
+        Returns:
+            list: The content of the transcript as a list of tuples (speaker, text).
+        """
+        with open(self.transcript_file_path, 'rb') as f:
+            return ast.literal_eval(pickle.load(f))
+    async def generate_audio_segment(self, text, voice_name):
+        """
+        Generate audio for a given text using edge-tts.
+        Args:
+            text (str): Text to be synthesized.
+            voice_name (str): The voice name to use for TTS.
+        Returns:
+            AudioSegment: Generated audio segment.
+        """
+        communicator = edge_tts.Communicate(text, voice_name=voice_name)
+        audio_bytes = b""
+        async for chunk in communicator.stream():
+            audio_bytes += chunk
+        return audio_bytes
+    def save_audio(self, audio_data):
+        """
+        Save the combined audio data to an output file.
+        Args:
+            audio_data (list): List of bytes containing the audio data for each segment.
+        """
+        combined_audio = b"".join(audio_data)
+        with open(self.output_audio_path, "wb") as f:
+            f.write(combined_audio)
+    async def generate_audio(self):
+        """
+        Converts the transcript into audio and saves it to a file.
+        Returns:
+            str: Path to the saved audio file.
+        """
+        transcript = self.load_transcript()
+        audio_data = []
+        for speaker, text in tqdm(transcript, desc="Generating podcast segments", unit="segment"):
+            voice = self.speaker1_voice if speaker == "Speaker 1" else self.speaker2_voice
+            segment_audio = await self.generate_audio_segment(text, voice)
+            audio_data.append(segment_audio)
+        self.save_audio(audio_data)
+        return self.output_audio_path
+# Run the audio generation asynchronously
+async def main():
+    generator = TTSGenerator("path/to/transcript.pkl", "output_audio.mp3")
+    await generator.generate_audio()
+# Run the main function
+await main()