# generate_transcript.py import torch from accelerate import Accelerator import transformers import pickle from tqdm import tqdm import warnings import spaces warnings.filterwarnings('ignore') class TranscriptProcessor: """ A class to generate and rewrite podcast-style transcripts using a specified language model. """ def __init__(self, text_file_path, model_name="meta-llama/Llama-3.1-8B-Instruct"): """ Initialize with the path to the cleaned text file and the model name. Args: text_file_path (str): Path to the file containing cleaned PDF text. model_name (str): Name of the language model to use. """ self.text_file_path = text_file_path self.transcript_output_path = './resources/data.pkl' self.tts_output_path = './resources/podcast_ready_data.pkl' self.model_name = model_name self.accelerator = Accelerator() self.model = transformers.pipeline( "text-generation", model=self.model_name, model_kwargs={"torch_dtype": torch.bfloat16}, device_map="auto" ) self.transcript_prompt = """ You are a world-class podcast writer, working as a ghost writer for top podcast hosts. You will write the dialogue with engaging interruptions, anecdotes, and curiosity-led questions. Speaker 1: Leads the conversation. Speaker 2: Asks follow-up questions and reacts with expressions. ALWAYS START WITH SPEAKER 1: STRICTLY THE DIALOGUES. """ self.rewrite_prompt = """ You are an international oscar-winning screenwriter creating a refined script for TTS. Speaker 1: Teaches with anecdotes; Speaker 2: Reacts with expressions like "umm," "hmm," [sigh]. Return the response as a list of tuples only, with no extra formatting. """ def load_text(self): """ Reads the cleaned text file and returns its content. Returns: str: Content of the cleaned text file. """ encodings = ['utf-8', 'latin-1', 'cp1252', 'iso-8859-1'] for encoding in encodings: try: with open(self.text_file_path, 'r', encoding=encoding) as file: content = file.read() print(f"Successfully read file using {encoding} encoding.") return content except (UnicodeDecodeError, FileNotFoundError): continue print(f"Error: Could not decode file '{self.text_file_path}' with any common encoding.") return None def generate_transcript(self): """ Generates a podcast-style transcript and saves it as a pickled file. Returns: str: Path to the file where the transcript is saved. """ input_text = self.load_text() if input_text is None: return None messages = [ {"role": "system", "content": self.transcript_prompt}, {"role": "user", "content": input_text} ] output = self.model( messages, max_new_tokens=8126, temperature=1 ) transcript = output[0]["generated_text"] # Save the transcript as a pickle file with open(self.transcript_output_path, 'wb') as f: pickle.dump(transcript, f) return self.transcript_output_path def rewrite_transcript(self): """ Refines the transcript for TTS, adding expressive elements and saving as a list of tuples. Returns: str: Path to the file where the TTS-ready transcript is saved. """ # Load the initial generated transcript with open(self.transcript_output_path, 'rb') as file: input_transcript = pickle.load(file) messages = [ {"role": "system", "content": self.rewrite_prompt}, {"role": "user", "content": input_transcript} ] output = self.model( messages, max_new_tokens=8126, temperature=1 ) rewritten_transcript = output[0]["generated_text"] # Save the rewritten transcript as a pickle file with open(self.tts_output_path, 'wb') as f: pickle.dump(rewritten_transcript, f) return self.tts_output_path