yasserrmd commited on
Commit
5671703
·
verified ·
1 Parent(s): 2a219dd

Create generate_audio_edgetts.py

Browse files
Files changed (1) hide show
  1. generate_audio_edgetts.py +91 -0
generate_audio_edgetts.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spaces
2
+ import pickle
3
+ import numpy as np
4
+ from tqdm import tqdm
5
+ import edge_tts
6
+ import ast
7
+ import asyncio
8
+
9
+ @spaces.GPU
10
+ class EdgeTTSGenerator:
11
+ """
12
+ A class to generate podcast-style audio from a transcript using edge-tts.
13
+ """
14
+ def __init__(self, transcript_file_path, output_audio_path):
15
+ """
16
+ Initialize the TTS generator with the path to the rewritten transcript file.
17
+
18
+ Args:
19
+ transcript_file_path (str): Path to the file containing the rewritten transcript.
20
+ output_audio_path (str): Path to save the generated audio file.
21
+ """
22
+ self.transcript_file_path = transcript_file_path
23
+ self.output_audio_path = output_audio_path
24
+
25
+ # Speaker descriptions for edge-tts voices
26
+ self.speaker1_voice = "en-US-AriaNeural"
27
+ self.speaker2_voice = "en-US-GuyNeural"
28
+
29
+ def load_transcript(self):
30
+ """
31
+ Loads the rewritten transcript from the specified file.
32
+
33
+ Returns:
34
+ list: The content of the transcript as a list of tuples (speaker, text).
35
+ """
36
+ with open(self.transcript_file_path, 'rb') as f:
37
+ return ast.literal_eval(pickle.load(f))
38
+
39
+ async def generate_audio_segment(self, text, voice_name):
40
+ """
41
+ Generate audio for a given text using edge-tts.
42
+
43
+ Args:
44
+ text (str): Text to be synthesized.
45
+ voice_name (str): The voice name to use for TTS.
46
+
47
+ Returns:
48
+ AudioSegment: Generated audio segment.
49
+ """
50
+ communicator = edge_tts.Communicate(text, voice_name=voice_name)
51
+ audio_bytes = b""
52
+ async for chunk in communicator.stream():
53
+ audio_bytes += chunk
54
+ return audio_bytes
55
+
56
+ def save_audio(self, audio_data):
57
+ """
58
+ Save the combined audio data to an output file.
59
+
60
+ Args:
61
+ audio_data (list): List of bytes containing the audio data for each segment.
62
+ """
63
+ combined_audio = b"".join(audio_data)
64
+ with open(self.output_audio_path, "wb") as f:
65
+ f.write(combined_audio)
66
+
67
+ async def generate_audio(self):
68
+ """
69
+ Converts the transcript into audio and saves it to a file.
70
+
71
+ Returns:
72
+ str: Path to the saved audio file.
73
+ """
74
+ transcript = self.load_transcript()
75
+ audio_data = []
76
+
77
+ for speaker, text in tqdm(transcript, desc="Generating podcast segments", unit="segment"):
78
+ voice = self.speaker1_voice if speaker == "Speaker 1" else self.speaker2_voice
79
+ segment_audio = await self.generate_audio_segment(text, voice)
80
+ audio_data.append(segment_audio)
81
+
82
+ self.save_audio(audio_data)
83
+ return self.output_audio_path
84
+
85
+ # Run the audio generation asynchronously
86
+ async def main():
87
+ generator = TTSGenerator("path/to/transcript.pkl", "output_audio.mp3")
88
+ await generator.generate_audio()
89
+
90
+ # Run the main function
91
+ await main()