Spaces:
Running
Running
File size: 4,493 Bytes
029a66e 5e82a0a 029a66e 59e6fd7 029a66e 59e6fd7 029a66e 4b94c2c 029a66e 4b94c2c 029a66e 4b94c2c 59e6fd7 029a66e 59e6fd7 029a66e 59e6fd7 029a66e 59e6fd7 029a66e 59e6fd7 029a66e 5e82a0a 029a66e 5e82a0a 029a66e 5e82a0a 029a66e 59e6fd7 029a66e 59e6fd7 5e82a0a 59e6fd7 5e82a0a 59e6fd7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
# generate_transcript.py
import pickle
from tqdm import tqdm
import warnings
from groq import Groq
import os
warnings.filterwarnings('ignore')
class TranscriptProcessor:
"""
A class to generate and rewrite podcast-style transcripts using a specified language model.
"""
def __init__(self, text_file_path,transcript_output_path,tts_output_path, model_name="llama3-70b-8192"):
"""
Initialize with the path to the cleaned text file and the model name.
Args:
text_file_path (str): Path to the file containing cleaned PDF text.
model_name (str): Name of the language model to use.
"""
self.text_file_path = text_file_path
self.transcript_output_path = transcript_output_path
self.tts_output_path = tts_output_path
self.model_name = model_name
self.transcript_prompt = """
You are a world-class podcast writer, working as a ghost writer for top podcast hosts.
You will write the dialogue with engaging interruptions, anecdotes, and curiosity-led questions.
Speaker 1: Leads the conversation. Speaker 2: Asks follow-up questions and reacts with expressions.
ALWAYS START WITH SPEAKER 1: STRICTLY THE DIALOGUES.
"""
self.rewrite_prompt = """
You are an international oscar-winning screenwriter creating a refined script for TTS.
Speaker 1: Teaches with anecdotes; Speaker 2: Reacts with expressions like "umm," "hmm," [sigh].
Return the response as a list of tuples only, with no extra formatting.
"""
def load_text(self):
"""
Reads the cleaned text file and returns its content.
Returns:
str: Content of the cleaned text file.
"""
encodings = ['utf-8', 'latin-1', 'cp1252', 'iso-8859-1']
for encoding in encodings:
try:
with open(self.text_file_path, 'r', encoding=encoding) as file:
content = file.read()
print(f"Successfully read file using {encoding} encoding.")
return content
except (UnicodeDecodeError, FileNotFoundError):
continue
print(f"Error: Could not decode file '{self.text_file_path}' with any common encoding.")
return None
def generate_transcript(self):
"""
Generates a podcast-style transcript and saves it as a pickled file.
Returns:
str: Path to the file where the transcript is saved.
"""
input_text = self.load_text()
if input_text is None:
return None
messages = [
{"role": "system", "content": self.transcript_prompt},
{"role": "user", "content": input_text}
]
client = Groq(
api_key=os.environ.get("GROQ_API_KEY"),
)
chat_completion = client.chat.completions.create(
messages=messages,
model=self.model_name,
)
transcript = chat_completion.choices[0].message.content
# Save the transcript as a pickle file
with open(self.transcript_output_path, 'wb') as f:
pickle.dump(transcript, f)
return self.transcript_output_path
def rewrite_transcript(self):
"""
Refines the transcript for TTS, adding expressive elements and saving as a list of tuples.
Returns:
str: Path to the file where the TTS-ready transcript is saved.
"""
# Load the initial generated transcript
with open(self.transcript_output_path, 'rb') as file:
input_transcript = pickle.load(file)
messages = [
{"role": "system", "content": self.rewrite_prompt},
{"role": "user", "content": input_transcript}
]
client = Groq(
api_key=os.environ.get("GROQ_API_KEY"),
)
chat_completion = client.chat.completions.create(
messages=messages,
model=self.model_name,
)
rewritten_transcript = chat_completion.choices[0].message.content
# Save the rewritten transcript as a pickle file
with open(self.tts_output_path, 'wb') as f:
pickle.dump(rewritten_transcript, f)
return self.tts_output_path
|