PierreBrunelle commited on
Commit
f7689c9
Β·
verified Β·
1 Parent(s): b24fd86

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +258 -0
app.py ADDED
@@ -0,0 +1,258 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pixeltable as pxt
3
+ from pixeltable.iterators import FrameIterator, StringSplitter
4
+ from pixeltable.functions.video import extract_audio
5
+ from pixeltable.functions.audio import get_metadata
6
+ from pixeltable.functions import openai
7
+ import os
8
+ import getpass
9
+ import numpy as np
10
+ from pixeltable.functions.huggingface import sentence_transformer
11
+
12
+ # Store OpenAI API Key
13
+ if 'OPENAI_API_KEY' not in os.environ:
14
+ os.environ['OPENAI_API_KEY'] = getpass.getpass('Enter your OpenAI API key:')
15
+
16
+ MAX_VIDEO_SIZE_MB = 35
17
+
18
+ def process_video(video_file, progress=gr.Progress()):
19
+
20
+ progress(0, desc="Initializing...")
21
+
22
+ try:
23
+ # Create a Table, a View, and Computed Columns
24
+ pxt.drop_dir('gong_demo', force=True)
25
+ pxt.create_dir('gong_demo')
26
+
27
+ calls_table = pxt.create_table(
28
+ 'gong_demo.calls', {
29
+ "video": pxt.VideoType(nullable=True),
30
+ }
31
+ )
32
+
33
+ frames_view = pxt.create_view(
34
+ "gong_demo.frames",
35
+ calls_table,
36
+ iterator=FrameIterator.create(video=calls_table.video, fps=1)
37
+ )
38
+
39
+ # Create computed columns to store transformations and persist outputs
40
+ calls_table['audio'] = extract_audio(calls_table.video, format='mp3')
41
+ calls_table['metadata'] = get_metadata(calls_table.audio)
42
+ calls_table['transcription'] = openai.transcriptions(audio=calls_table.audio, model='whisper-1')
43
+ calls_table['transcription_text'] = calls_table.transcription.text.astype(pxt.StringType())
44
+
45
+ sentences_view = pxt.create_view(
46
+ 'gong_demo.sentences',
47
+ calls_table,
48
+ iterator=StringSplitter.create(
49
+ text=calls_table.transcription_text,
50
+ separators='sentence'
51
+ )
52
+ )
53
+
54
+ @pxt.expr_udf
55
+ def e5_embed(text: str) -> np.ndarray:
56
+ return sentence_transformer(text, model_id='intfloat/e5-large-v2')
57
+
58
+ sentences_view.add_embedding_index('text', string_embed=e5_embed)
59
+
60
+ progress(0.2, desc="Creating UDFs...")
61
+
62
+ # Custom User-Defined Function (UDF) for Generating Insights
63
+ @pxt.udf
64
+ def generate_insights(transcription: str) -> list[dict]:
65
+ system_msg = 'You are an AI assistant that analyzes call transcriptions. Analyze the following call transcription and provide insights on: 1. Main topics discussed 2. Action items 3. Sentiment analysis 4. Key questions asked'
66
+ user_msg = f'Transcription: "{transcription}"'
67
+ return [
68
+ {'role': 'system', 'content': system_msg},
69
+ {'role': 'user', 'content': user_msg}
70
+ ]
71
+
72
+ # Apply the UDF to create a new column
73
+ calls_table['insights_prompt'] = generate_insights(calls_table.transcription_text)
74
+
75
+ progress(0.4, desc="Generating insights...")
76
+
77
+ # Generate insights using OpenAI's chat completion API
78
+ calls_table['insights_response'] = openai.chat_completions(messages=calls_table.insights_prompt, model='gpt-3.5-turbo', max_tokens=500)
79
+
80
+ # Extract the content of the response
81
+ calls_table['insights'] = calls_table.insights_response.choices[0].message.content
82
+
83
+ if not video_file:
84
+ return "Please upload a video file.", ""
85
+
86
+ # Check video file size
87
+ video_size = os.path.getsize(video_file) / (1024 * 1024) # Convert to MB
88
+ if video_size > MAX_VIDEO_SIZE_MB:
89
+ return f"The video file is larger than {MAX_VIDEO_SIZE_MB} MB. Please upload a smaller file.", ""
90
+
91
+ progress(0.6, desc="Processing video...")
92
+
93
+ # Insert a video into the table
94
+ calls_table.insert([{"video": video_file}])
95
+
96
+ progress(0.8, desc="Retrieving results...")
97
+
98
+ # Retrieve transcription and insights
99
+ result = calls_table.select(calls_table.transcription_text, calls_table.insights).tail(1)
100
+ transcription = result['transcription_text'][0]
101
+ insights = result['insights'][0]
102
+
103
+ progress(1.0, desc="Processing complete")
104
+
105
+ return transcription, insights, "Processing complete"
106
+
107
+ except Exception as e:
108
+ return f"An error occurred during video processing: {str(e)}", ""
109
+
110
+ # Perform similarity search
111
+ def similarity_search(query, num_results, progress=gr.Progress()):
112
+
113
+ sentences_view = pxt.get_table('gong_demo.sentences')
114
+
115
+ progress(0.5, desc="Performing search...")
116
+
117
+ sim = sentences_view.text.similarity(query)
118
+ results = sentences_view.order_by(sim, asc=False).limit(num_results).select(sentences_view.text, sim=sim).collect().to_pandas()
119
+ return results
120
+
121
+ progress(1.0, desc="Search complete")
122
+
123
+ def chatbot_response(message, chat_history):
124
+ @pxt.udf
125
+ def create_chatbot_prompt(context: str, question: str) -> list[dict]:
126
+ system_message = "You are an AI assistant that answers questions about a call based on the provided context. If the answer cannot be found in the context, say that you don't know."
127
+ user_message = f"Context:\n{context}\n\nQuestion: {question}"
128
+ return [
129
+ {"role": "system", "content": system_message},
130
+ {"role": "user", "content": user_message}
131
+ ]
132
+
133
+ try:
134
+ sentences_view = pxt.get_table('gong_demo.sentences')
135
+
136
+ # Perform similarity search to get relevant context
137
+ sim = sentences_view.text.similarity(message)
138
+ context = sentences_view.order_by(sim, asc=False).limit(5).select(sentences_view.text, sim=sim).collect()
139
+
140
+ # Prepare the context for the prompt
141
+ context_text = "\n".join([row['text'] for row in context])
142
+
143
+ # Create a temporary table for the chatbot interaction
144
+ temp_table = pxt.create_table('gong_demo.temp_chatbot', {'question': pxt.StringType()})
145
+ temp_table.insert([{'question': message}])
146
+
147
+ # Create computed columns for the prompt and response
148
+ temp_table['chatbot_prompt'] = create_chatbot_prompt(context_text, temp_table.question)
149
+ temp_table['chatbot_response'] = openai.chat_completions(
150
+ messages=temp_table.chatbot_prompt,
151
+ model='gpt-3.5-turbo',
152
+ max_tokens=150
153
+ )
154
+ temp_table['answer'] = temp_table.chatbot_response.choices[0].message.content
155
+
156
+ answer = temp_table.select(temp_table.answer).collect()['answer'][0]
157
+
158
+ # Clean up the temporary table
159
+ pxt.drop_table('gong_demo.temp_chatbot', force=True)
160
+
161
+ chat_history.append((message, answer))
162
+ return "", chat_history # Return both expected outputs
163
+ except Exception as e:
164
+ error_message = f"An error occurred: {str(e)}"
165
+ chat_history.append((message, error_message))
166
+ return "", chat_history # Return both expec
167
+
168
+ # Gradio interface
169
+ with gr.Blocks(theme=gr.themes.Base()) as demo:
170
+ gr.Markdown(
171
+ """
172
+ <div style="text-align: left; margin-bottom: 20px;">
173
+ <img src="https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/source/data/pixeltable-logo-large.png" alt="Pixeltable" style="max-width: 150px;" />
174
+ <h1 style="margin-top: 10px;">Call Analysis AI Tool</h1>
175
+ </div>
176
+ """
177
+ )
178
+ gr.HTML(
179
+ """
180
+ <p style="text-align: left;">
181
+ Powered by <a href="https://github.com/pixeltable/pixeltable" target="_blank" style="color: #F25022; text-decoration: none; font-weight: bold;">Pixeltable</a>
182
+ - Analyze calls, extract insights, and interact with AI-powered assistance.
183
+ </p>
184
+ """
185
+ )
186
+
187
+ with gr.Row():
188
+ with gr.Column():
189
+ with gr.Accordion("🎯 What does it do?", open=False):
190
+ gr.Markdown("""
191
+ - πŸŽ™οΈ Transcribes call audio to text
192
+ - πŸ’‘ Generates insights and key points
193
+ - πŸ” Enables content-based similarity search
194
+ - πŸ€– Provides an AI chatbot for in-depth analysis
195
+ - πŸ“Š Offers summaries of call data
196
+ """)
197
+ with gr.Column():
198
+ with gr.Accordion("πŸ› οΈ How does it work?", open=False):
199
+ gr.Markdown("""
200
+ 1. πŸ“€ Upload your call recording (video)
201
+ 2. βš™οΈ AI processes and analyzes the content
202
+ 3. πŸ“ Review the transcript and generated insights
203
+ 4. πŸ”Ž Use similarity search to explore specific topics
204
+ 5. πŸ’¬ Interact with the AI chatbot for deeper understanding
205
+ """)
206
+
207
+ with gr.Row():
208
+ with gr.Column(scale=1):
209
+ video_file = gr.Video(
210
+ label=f"Upload Call Recording (max {MAX_VIDEO_SIZE_MB} MB)",
211
+ )
212
+ process_btn = gr.Button("Analyze Call", variant="primary")
213
+ status_output = gr.Textbox(label="Status", interactive=False)
214
+
215
+ with gr.Column(scale=2):
216
+ with gr.Tabs() as tabs:
217
+ with gr.TabItem("πŸ“ Transcript"):
218
+ output_transcription = gr.Textbox(label="Call Transcription", lines=15)
219
+
220
+ with gr.TabItem("πŸ’‘ Insights"):
221
+ output_insights = gr.Textbox(label="Key Takeaways", lines=10)
222
+
223
+ with gr.TabItem("πŸ” Similarity Search"):
224
+ with gr.Row():
225
+ similarity_query = gr.Textbox(label="Search Query", placeholder="Enter a topic or phrase to search for")
226
+ num_results = gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Number of Results")
227
+ similarity_search_btn = gr.Button("Search", variant="secondary")
228
+ similarity_results = gr.DataFrame(
229
+ headers=["Relevant Text", "Similarity Score"],
230
+ label="Search Results"
231
+ )
232
+
233
+ with gr.TabItem("πŸ€– AI Assistant"):
234
+ chatbot = gr.Chatbot(height=400, label="Chat with AI about the call")
235
+ with gr.Row():
236
+ msg = gr.Textbox(label="Ask a question about the call", placeholder="e.g., What were the main points discussed?", scale=4)
237
+ send_btn = gr.Button("Send", variant="secondary", scale=1)
238
+ clear = gr.Button("Clear Chat")
239
+
240
+ process_btn.click(
241
+ process_video,
242
+ inputs=[video_file],
243
+ outputs=[output_transcription, output_insights, status_output],
244
+ show_progress="full"
245
+ )
246
+
247
+ similarity_search_btn.click(
248
+ similarity_search,
249
+ inputs=[similarity_query, num_results],
250
+ outputs=[similarity_results]
251
+ )
252
+
253
+ msg.submit(chatbot_response, [msg, chatbot], [msg, chatbot])
254
+ send_btn.click(chatbot_response, [msg, chatbot], [msg, chatbot])
255
+ clear.click(lambda: None, None, chatbot, queue=False)
256
+
257
+ if __name__ == "__main__":
258
+ demo.launch(debug=True)