Spaces:
Running
Running
# generate_transcript.py | |
import pickle | |
from tqdm import tqdm | |
import warnings | |
from groq import Groq | |
import os | |
warnings.filterwarnings('ignore') | |
class TranscriptProcessor: | |
""" | |
A class to generate and rewrite podcast-style transcripts using a specified language model. | |
""" | |
def __init__(self, text_file_path,transcript_output_path,tts_output_path, model_name="llama3-70b-8192"): | |
""" | |
Initialize with the path to the cleaned text file and the model name. | |
Args: | |
text_file_path (str): Path to the file containing cleaned PDF text. | |
model_name (str): Name of the language model to use. | |
""" | |
self.text_file_path = text_file_path | |
self.transcript_output_path = transcript_output_path | |
self.tts_output_path = tts_output_path | |
self.model_name = model_name | |
self.transcript_prompt = """ | |
You are a world-class podcast writer, working as a ghost writer for top podcast hosts. | |
You will write the dialogue with engaging interruptions, anecdotes, and curiosity-led questions. | |
Speaker 1: Leads the conversation. Speaker 2: Asks follow-up questions and reacts with expressions. | |
ALWAYS START WITH SPEAKER 1: STRICTLY THE DIALOGUES. | |
""" | |
self.rewrite_prompt = """ | |
You are an international oscar-winning screenwriter creating a refined script for TTS. | |
Speaker 1: Teaches with anecdotes; Speaker 2: Reacts with expressions like "umm," "hmm," [sigh]. | |
Return the response as a list of tuples only, with no extra formatting. | |
""" | |
def load_text(self): | |
""" | |
Reads the cleaned text file and returns its content. | |
Returns: | |
str: Content of the cleaned text file. | |
""" | |
encodings = ['utf-8', 'latin-1', 'cp1252', 'iso-8859-1'] | |
for encoding in encodings: | |
try: | |
with open(self.text_file_path, 'r', encoding=encoding) as file: | |
content = file.read() | |
print(f"Successfully read file using {encoding} encoding.") | |
return content | |
except (UnicodeDecodeError, FileNotFoundError): | |
continue | |
print(f"Error: Could not decode file '{self.text_file_path}' with any common encoding.") | |
return None | |
def generate_transcript(self): | |
""" | |
Generates a podcast-style transcript and saves it as a pickled file. | |
Returns: | |
str: Path to the file where the transcript is saved. | |
""" | |
input_text = self.load_text() | |
if input_text is None: | |
return None | |
messages = [ | |
{"role": "system", "content": self.transcript_prompt}, | |
{"role": "user", "content": input_text} | |
] | |
client = Groq( | |
api_key=os.environ.get("GROQ_API_KEY"), | |
) | |
chat_completion = client.chat.completions.create( | |
messages=messages, | |
model=self.model_name, | |
) | |
transcript = chat_completion.choices[0].message.content | |
# Save the transcript as a pickle file | |
with open(self.transcript_output_path, 'wb') as f: | |
pickle.dump(transcript, f) | |
return self.transcript_output_path | |
def rewrite_transcript(self): | |
""" | |
Refines the transcript for TTS, adding expressive elements and saving as a list of tuples. | |
Returns: | |
str: Path to the file where the TTS-ready transcript is saved. | |
""" | |
# Load the initial generated transcript | |
with open(self.transcript_output_path, 'rb') as file: | |
input_transcript = pickle.load(file) | |
messages = [ | |
{"role": "system", "content": self.rewrite_prompt}, | |
{"role": "user", "content": input_transcript} | |
] | |
client = Groq( | |
api_key=os.environ.get("GROQ_API_KEY"), | |
) | |
chat_completion = client.chat.completions.create( | |
messages=messages, | |
model=self.model_name, | |
) | |
rewritten_transcript = chat_completion.choices[0].message.content | |
# Save the rewritten transcript as a pickle file | |
with open(self.tts_output_path, 'wb') as f: | |
pickle.dump(rewritten_transcript, f) | |
return self.tts_output_path | |