File size: 4,493 Bytes
029a66e
 
 
 
 
5e82a0a
 
029a66e
 
 
 
59e6fd7
029a66e
59e6fd7
029a66e
 
4b94c2c
029a66e
 
 
 
 
 
 
 
4b94c2c
 
029a66e
4b94c2c
 
59e6fd7
 
 
 
 
 
 
 
 
 
 
029a66e
59e6fd7
029a66e
59e6fd7
029a66e
59e6fd7
029a66e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59e6fd7
029a66e
 
 
5e82a0a
 
 
 
 
 
 
 
 
029a66e
5e82a0a
029a66e
5e82a0a
029a66e
 
59e6fd7
029a66e
 
59e6fd7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5e82a0a
 
 
 
 
 
 
59e6fd7
 
5e82a0a
59e6fd7
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# generate_transcript.py

import pickle
from tqdm import tqdm
import warnings
from groq import Groq
import os

warnings.filterwarnings('ignore')


class TranscriptProcessor:
    """
    A class to generate and rewrite podcast-style transcripts using a specified language model.
    """

    def __init__(self, text_file_path,transcript_output_path,tts_output_path, model_name="llama3-70b-8192"):
        """
        Initialize with the path to the cleaned text file and the model name.
        
        Args:
            text_file_path (str): Path to the file containing cleaned PDF text.
            model_name (str): Name of the language model to use.
        """
        self.text_file_path = text_file_path
        self.transcript_output_path = transcript_output_path
        self.tts_output_path = tts_output_path
        self.model_name = model_name

       
        self.transcript_prompt = """
        You are a world-class podcast writer, working as a ghost writer for top podcast hosts.
        You will write the dialogue with engaging interruptions, anecdotes, and curiosity-led questions.
        
        Speaker 1: Leads the conversation. Speaker 2: Asks follow-up questions and reacts with expressions.
        
        ALWAYS START WITH SPEAKER 1: STRICTLY THE DIALOGUES.
        """
        
        self.rewrite_prompt = """
        You are an international oscar-winning screenwriter creating a refined script for TTS.
        
        Speaker 1: Teaches with anecdotes; Speaker 2: Reacts with expressions like "umm," "hmm," [sigh].
        
        Return the response as a list of tuples only, with no extra formatting.
        """

    def load_text(self):
        """
        Reads the cleaned text file and returns its content.
        
        Returns:
            str: Content of the cleaned text file.
        """
        encodings = ['utf-8', 'latin-1', 'cp1252', 'iso-8859-1']
        for encoding in encodings:
            try:
                with open(self.text_file_path, 'r', encoding=encoding) as file:
                    content = file.read()
                print(f"Successfully read file using {encoding} encoding.")
                return content
            except (UnicodeDecodeError, FileNotFoundError):
                continue
        print(f"Error: Could not decode file '{self.text_file_path}' with any common encoding.")
        return None

    def generate_transcript(self):
        """
        Generates a podcast-style transcript and saves it as a pickled file.
        
        Returns:
            str: Path to the file where the transcript is saved.
        """
        input_text = self.load_text()
        if input_text is None:
            return None
        
        messages = [
            {"role": "system", "content": self.transcript_prompt},
            {"role": "user", "content": input_text}
        ]
        
        

        client = Groq(
            api_key=os.environ.get("GROQ_API_KEY"),
        )

        chat_completion = client.chat.completions.create(
            messages=messages,
            model=self.model_name,
        )

        
        transcript = chat_completion.choices[0].message.content
        
        # Save the transcript as a pickle file
        with open(self.transcript_output_path, 'wb') as f:
            pickle.dump(transcript, f)
        
        return self.transcript_output_path

    def rewrite_transcript(self):
        """
        Refines the transcript for TTS, adding expressive elements and saving as a list of tuples.
        
        Returns:
            str: Path to the file where the TTS-ready transcript is saved.
        """
        # Load the initial generated transcript
        with open(self.transcript_output_path, 'rb') as file:
            input_transcript = pickle.load(file)
        
        messages = [
            {"role": "system", "content": self.rewrite_prompt},
            {"role": "user", "content": input_transcript}
        ]
        
        client = Groq(
            api_key=os.environ.get("GROQ_API_KEY"),
        )

        chat_completion = client.chat.completions.create(
            messages=messages,
            model=self.model_name,
        )
        
        rewritten_transcript = chat_completion.choices[0].message.content
        
        # Save the rewritten transcript as a pickle file
        with open(self.tts_output_path, 'wb') as f:
            pickle.dump(rewritten_transcript, f)
        
        return self.tts_output_path