adiv07 commited on
Commit
6a981b2
·
verified ·
1 Parent(s): 03b1652

Update Gpt4oDemo.py

Browse files
Files changed (1) hide show
  1. Gpt4oDemo.py +411 -401
Gpt4oDemo.py CHANGED
@@ -1,402 +1,412 @@
1
- import gradio as gr
2
- import plotly.graph_objs as go
3
- import numpy as np
4
- import time
5
- from openai import OpenAI
6
- import os
7
- from hardCodedData import *
8
- from Helper import *
9
- import cv2
10
- from moviepy.editor import VideoFileClip
11
- import time
12
- import base64
13
- import whisperx
14
- import gc
15
- from moviepy.editor import VideoFileClip
16
- from dotenv import load_dotenv
17
-
18
- load_dotenv()
19
-
20
- '''
21
- Model Information
22
- GPT4o
23
- '''
24
-
25
- import openai
26
- api_key = os.getenv("OPENAI_API_KEY")
27
- client = openai.OpenAI(
28
- api_key=api_key,
29
- base_url="https://openai.gateway.salt-lab.org/v1",
30
- )
31
- MODEL="gpt-4o"
32
-
33
- # Whisperx config
34
- device = "cpu"
35
- batch_size = 16 # reduce if low on GPU mem
36
- compute_type = "int8" # change to "int8" if low on GPU mem (may reduce accuracy)
37
- model = whisperx.load_model("large-v2", device, compute_type=compute_type)
38
-
39
- '''
40
- Video
41
- '''
42
- video_file = None
43
- audio_path=None
44
- base64Frames = []
45
- transcript=""
46
-
47
- def process_video(video_path, seconds_per_frame=2):
48
- global base64Frames, audio_path
49
- base_video_path, _ = os.path.splitext(video_path)
50
-
51
- video = cv2.VideoCapture(video_path)
52
- total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
53
- fps = video.get(cv2.CAP_PROP_FPS)
54
- frames_to_skip = int(fps * seconds_per_frame)
55
- curr_frame=0
56
-
57
- while curr_frame < total_frames - 1:
58
- video.set(cv2.CAP_PROP_POS_FRAMES, curr_frame)
59
- success, frame = video.read()
60
- if not success:
61
- break
62
- _, buffer = cv2.imencode(".jpg", frame)
63
- base64Frames.append(base64.b64encode(buffer).decode("utf-8"))
64
- curr_frame += frames_to_skip
65
- video.release()
66
-
67
- audio_path = "./TEST.mp3"
68
- clip = VideoFileClip(video_path)
69
- clip.audio.write_audiofile(audio_path, bitrate="32k")
70
- clip.audio.close()
71
- clip.close()
72
- # transcribe_video(audio_path)
73
- print(f"Extracted {len(base64Frames)} frames")
74
- print(f"Extracted audio to {audio_path}")
75
- return base64Frames, audio_path
76
-
77
- chat_history = []
78
- # chat_history.append({
79
- # "role": "system",
80
- # "content": (
81
- # """
82
- # You are an assistant chatbot for a Speech Language Pathologist (SLP).
83
- # Your task is to help analyze a provided video of a therapy session and answer questions accurately.
84
- # Provide timestamps for specific events or behaviors mentioned. Conclude each response with possible follow-up questions.
85
-
86
- # Follow these steps:
87
-
88
- # 1. Suggest to the user to ask, “To get started, you can try asking me how many people there are in the video.”
89
- # 2. Detect how many people are in the video.
90
- # 2. Suggest to the user to tell you the names of the people in the video, starting from left to right.
91
- # 3. After receiving the names, respond with, “Ok thank you! Now you can ask me any questions about this video.”
92
- # 4. If the user asks about a behavior, respond with, “My understanding of this behavior is [xxx - AI generated output]. Is this a behavior that you want to track? If it is, please define this behavior and tell me more about it so I can analyze it more accurately according to your practice.”
93
- # 5. If you receive names, confirm that these are the names of the people from left to right.
94
- # """
95
- # )
96
- # })
97
-
98
- def transcribe_video(filename):
99
- global transcript
100
- if not audio_path:
101
- raise ValueError("Audio path is None")
102
- print(audio_path)
103
- audio = whisperx.load_audio(audio_path)
104
- result = model.transcribe(audio, batch_size=batch_size)
105
-
106
- model_a, metadata = whisperx.load_align_model(language_code=result["language"], device=device)
107
- result = whisperx.align(result["segments"], model_a, metadata, audio, device, return_char_alignments=False)
108
-
109
-
110
- hf_auth_token = os.getenv("HF_AUTH_TOKEN")
111
- diarize_model = whisperx.DiarizationPipeline(use_auth_token=hf_auth_token, device=device)
112
-
113
- diarize_segments = diarize_model(audio)
114
-
115
- dia_result = whisperx.assign_word_speakers(diarize_segments, result)
116
-
117
- for res in dia_result["segments"]:
118
- # transcript += "Speaker: " + str(res.get("speaker", None)) + "\n"
119
- transcript += "Dialogue: " + str(res["text"].lstrip()) + "\n"
120
- transcript += "start: " + str(int(res["start"])) + "\n"
121
- transcript += "end: " + str(int(res["end"])) + "\n"
122
- transcript += "\n"
123
-
124
- return transcript
125
-
126
-
127
- def handle_video(video=None):
128
- global video_file, base64Frames, audio_path, chat_history, transcript
129
-
130
- if video is None:
131
- # Load example video
132
- video = "./TEST.mp4"
133
-
134
- base64Frames, audio_path = process_video(video_path=video, seconds_per_frame=100)
135
- chat_history.append({
136
- "role": "user",
137
- "content": [
138
- {"type": "text", "text": "These are the frames from the video."},
139
- *map(lambda x: {"type": "image_url", "image_url": {"url": f'data:image/jpg;base64,{x}', "detail": "low"}}, base64Frames)
140
- ]
141
- })
142
-
143
- if transcript:
144
- chat_history[-1]['content'].append({
145
- "type": "text",
146
- "text": f"Also, below is the template of transcript from the video:\n"
147
- "Speaker: <the speaker of the dialogue>\n"
148
- "Dialogue: <the text of the dialogue>\n"
149
- "start: <the starting timestamp of the dialogue in the video in second>\n"
150
- "end: <the ending timestamp of the dialogue in the video in second>\n"
151
- f"Transcription: {transcript}"
152
- })
153
-
154
- video_file = video
155
- return video_file
156
-
157
- '''
158
- Chatbot
159
- '''
160
-
161
- def new_prompt(prompt):
162
- global chat_history, video_file
163
- chat_history.append({"role": "user","content": prompt,})
164
- MODEL="gpt-4o"
165
- # print(chat_history)
166
- print(transcript)
167
- try:
168
- if video_file:
169
- # Video exists and is processed
170
- response = client.chat.completions.create(model=MODEL,messages=chat_history,temperature=0,)
171
- else:
172
- # No video uploaded yet
173
- response = client.chat.completions.create(model=MODEL,messages=chat_history,temperature=0,)
174
-
175
- # Extract the text content from the response and append it to the chat history
176
- assistant_message = response.choices[0].message.content
177
- chat_history.append({'role': 'model', 'content': assistant_message})
178
- print(assistant_message)
179
- except Exception as e:
180
- print("Error: ",e)
181
- assistant_message = "API rate limit has been reached. Please wait a moment and try again."
182
- chat_history.append({'role': 'model', 'content': assistant_message})
183
-
184
- # except google.api_core.exceptions.ResourceExhausted:
185
- # assistant_message = "API rate limit has been reached. Please wait a moment and try again."
186
- # chat_history.append({'role': 'model', 'parts': [assistant_message]})
187
- # except Exception as e:
188
- # assistant_message = f"An error occurred: {str(e)}"
189
- # chat_history.append({'role': 'model', 'parts': [assistant_message]})
190
-
191
- return chat_history
192
-
193
- def user_input(user_message, history):
194
- return "", history + [[user_message, None]]
195
-
196
- def bot_response(history):
197
- user_message = history[-1][0]
198
- updated_history = new_prompt(user_message)
199
- assistant_message = updated_history[-1]['content']
200
- history[-1][1] = assistant_message
201
- yield history
202
-
203
-
204
- '''
205
- Behaivor box
206
- '''
207
- initial_behaviors = [
208
- ("Initiating Behavioral Request (IBR)",
209
- ("The child's skill in using behavior(s) to elicit aid in obtaining an object, or object related event",
210
- ["00:10", "00:45", "01:30"])),
211
-
212
- ("Initiating Joint Attention (IJA)",
213
- ("The child's skill in using behavior(s) to initiate shared attention to objects or events.",
214
- ["00:15", "00:50", "01:40"])),
215
-
216
- ("Responding to Joint Attention (RJA)",
217
- ("The child's skill in following the examiner’s line of regard and pointing gestures.",
218
- ["00:20", "01:00", "02:00"])),
219
-
220
- ("Initiating Social Interaction (ISI)",
221
- ("The child's skill at initiating turn-taking sequences and the tendency to tease the tester",
222
- ["00:20", "00:50", "02:00"])),
223
-
224
- ("Responding to Social Interaction (RSI)",
225
- ("The child’s skill in responding to turn-taking interactions initiated by the examiner.",
226
- ["00:20", "01:00", "02:00"]))
227
- ]
228
-
229
- behaviors = initial_behaviors
230
- behavior_bank = []
231
-
232
- def add_or_update_behavior(name, definition, timestamps, selected_behavior):
233
- global behaviors, behavior_bank
234
- if selected_behavior: # Update existing behavior
235
- for i, (old_name, _) in enumerate(behaviors):
236
- if old_name == selected_behavior:
237
- behaviors[i] = (name, (definition, timestamps))
238
- break
239
- # Update behavior in the bank if it exists
240
- behavior_bank = [name if b == selected_behavior else b for b in behavior_bank]
241
- else: # Add new behavior
242
- new_behavior = (name, (definition, timestamps))
243
- behaviors.append(new_behavior)
244
- choices = [b[0] for b in behaviors]
245
- return gr.Dropdown(choices=choices, value=None, interactive=True), gr.CheckboxGroup(choices=behavior_bank, value=behavior_bank, interactive=True), "", "", ""
246
-
247
- def add_to_behaivor_bank(selected_behavior, checkbox_group_values):
248
- global behavior_bank
249
- if selected_behavior and selected_behavior not in checkbox_group_values:
250
- checkbox_group_values.append(selected_behavior)
251
- behavior_bank = checkbox_group_values
252
- return gr.CheckboxGroup(choices=checkbox_group_values, value=checkbox_group_values, interactive=True), gr.Dropdown(value=None,interactive=True)
253
-
254
- def delete_behavior(selected_behavior, checkbox_group_values):
255
- global behaviors, behavior_bank
256
- behaviors = [b for b in behaviors if b[0] != selected_behavior]
257
- behavior_bank = [b for b in behavior_bank if b != selected_behavior]
258
- updated_choices = [b[0] for b in behaviors]
259
- updated_checkbox_group = [cb for cb in checkbox_group_values if cb != selected_behavior]
260
- return gr.Dropdown(choices=updated_choices, value=None, interactive=True), gr.CheckboxGroup(choices=updated_checkbox_group, value=updated_checkbox_group, interactive=True)
261
-
262
- def edit_behavior(selected_behavior):
263
- for name, (definition, timestamps) in behaviors:
264
- if name == selected_behavior:
265
- # Return values to populate textboxes
266
- return name, definition, timestamps
267
- return "", "", ""
268
-
269
-
270
- welcome_message = """
271
- Hello! I'm your AI assistant.
272
- I can help you analyze your video sessions following your instructions.
273
- To get started, please upload a video or add your behaviors to the Behavior Bank using the Behavior Manager.
274
- """
275
- #If you want to tell me about the people in the video, please name them starting from left to right.
276
-
277
- css="""
278
- body {
279
- background-color: #edf1fa; /* offwhite */
280
- }
281
- .gradio-container {
282
- background-color: #edf1fa; /* offwhite */
283
- }
284
- .column-form .wrap {
285
- flex-direction: column;
286
- }
287
- .sidebar {
288
- background: #ffffff;
289
- padding: 10px;
290
- border-right: 1px solid #dee2e6;
291
- }
292
- .content {
293
- padding: 10px;
294
- }
295
- """
296
-
297
- '''
298
- Gradio Demo
299
- '''
300
- with gr.Blocks(theme='base', css=css, title="Soap.AI") as demo:
301
- gr.Markdown("# 🤖 AI-Supported SOAP Generation")
302
-
303
- with gr.Row():
304
- with gr.Column():
305
- video = gr.Video(label="Video", visible=True, height=360, container=True)
306
- with gr.Row():
307
- with gr.Column(min_width=1, scale=1):
308
- video_upload_button = gr.Button("Analyze Video", variant="primary")
309
- with gr.Column(min_width=1, scale=1):
310
- example_video_button = gr.Button("Load Example Video")
311
-
312
- video_upload_button.click(handle_video, inputs=video, outputs=video)
313
- example_video_button.click(handle_video, None, outputs=video)
314
-
315
- with gr.Column():
316
- chat_section = gr.Group(visible=True)
317
- with chat_section:
318
- chatbot = gr.Chatbot(elem_id="chatbot",
319
- container=True,
320
- likeable=True,
321
- value=[[None, welcome_message]],
322
- avatar_images=(None, "./avatar.webp"))
323
- with gr.Row():
324
- txt = gr.Textbox(show_label=False, placeholder="Type here!")
325
- with gr.Row():
326
- send_btn = gr.Button("Send Message", elem_id="send-btn", variant="primary")
327
- clear_btn = gr.Button("Clear Chat", elem_id="clear-btn")
328
-
329
- with gr.Row():
330
- behaivor_bank = gr.CheckboxGroup(label="Behavior Bank",
331
- choices=[],
332
- interactive=True,
333
- info="A space to store all the behaviors you want to analyze.")
334
- open_sidebar_btn = gr.Button("Show Behavior Manager", scale=0)
335
- close_sidebar_btn = gr.Button("Hide Behavior Manager", visible=False, scale=0)
336
-
337
- txt.submit(user_input, [txt, chatbot], [txt, chatbot], queue=False).then(
338
- bot_response, chatbot, chatbot)
339
- send_btn.click(user_input, [txt, chatbot], [txt, chatbot], queue=False).then(
340
- bot_response, chatbot, chatbot)
341
- clear_btn.click(lambda: None, None, chatbot, queue=False)
342
-
343
- # Define a sidebar column that is initially hidden
344
- with gr.Column(visible=False, min_width=200, scale=0.5, elem_classes="sidebar") as sidebar:
345
- behavior_dropdown = gr.Dropdown(label="Behavior Collection",
346
- choices=behaviors,
347
- interactive=True,
348
- container=True,
349
- elem_classes="column-form",
350
- info="Choose a behavior to add to the bank, edit or remove.")
351
- with gr.Row():
352
- add_toBank_button = gr.Button("Add Behavior to Bank", variant="primary")
353
- edit_button = gr.Button("Edit Behavior")
354
- delete_button = gr.Button("Remove Behavior")
355
-
356
- with gr.Row():
357
- name_input = gr.Textbox(label="Behavior Name",
358
- placeholder="(e.g., IBR)",
359
- info="The name you give to the specific behavior you're tracking or analyzing.")
360
- timestamps_input = gr.Textbox(label="Timestamps MM:SS",
361
- placeholder="(e.g., (01:15,01:35) )",
362
- info="The exact times during a session when you saw the behavior. The first two digits represent minutes and the last two digits represent seconds.")
363
- definition_input = gr.Textbox(lines=3,
364
- label="Behavior Definition",
365
- placeholder="(e.g., the child's skill in using behavior(s) to elicit aid in obtaining an object, or object related event)",
366
- info="Provide a clear definition of the behavior.")
367
-
368
- with gr.Row():
369
- submit_button = gr.Button("Save Behavior", variant="primary")
370
-
371
- submit_button.click(fn=add_or_update_behavior,
372
- inputs=[name_input, definition_input, timestamps_input, behavior_dropdown],
373
- outputs=[behavior_dropdown, behaivor_bank, name_input, definition_input, timestamps_input])
374
-
375
- add_toBank_button.click(fn=add_to_behaivor_bank,
376
- inputs=[behavior_dropdown, behaivor_bank],
377
- outputs=[behaivor_bank, behavior_dropdown])
378
-
379
- delete_button.click(fn=delete_behavior,
380
- inputs=[behavior_dropdown, behaivor_bank],
381
- outputs=[behavior_dropdown, behaivor_bank])
382
-
383
- edit_button.click(fn=edit_behavior,
384
- inputs=[behavior_dropdown],
385
- outputs=[name_input, definition_input, timestamps_input])
386
-
387
- # Function to open the sidebar
388
- open_sidebar_btn.click(lambda: {
389
- open_sidebar_btn: gr.Button(visible=False),
390
- close_sidebar_btn: gr.Button(visible=True),
391
- sidebar: gr.Column(visible=True)
392
- }, outputs=[open_sidebar_btn, close_sidebar_btn, sidebar])
393
-
394
- # Function to close the sidebar
395
- close_sidebar_btn.click(lambda: {
396
- open_sidebar_btn: gr.Button(visible=True),
397
- close_sidebar_btn: gr.Button(visible=False),
398
- sidebar: gr.Column(visible=False)
399
- }, outputs=[open_sidebar_btn, close_sidebar_btn, sidebar])
400
-
401
- # Launch the demo
 
 
 
 
 
 
 
 
 
 
402
  demo.launch(share=True)
 
1
+ import gradio as gr
2
+ import plotly.graph_objs as go
3
+ import numpy as np
4
+ import time
5
+ from openai import OpenAI
6
+ import os
7
+ from hardCodedData import *
8
+ from Helper import *
9
+ import cv2
10
+ from moviepy.editor import VideoFileClip
11
+ import time
12
+ import base64
13
+ import whisperx
14
+ import gc
15
+ from moviepy.editor import VideoFileClip
16
+ from dotenv import load_dotenv
17
+
18
+ load_dotenv()
19
+
20
+ '''
21
+ Model Information
22
+ GPT4o
23
+ '''
24
+
25
+ import openai
26
+ api_key = os.getenv("OPENAI_API_KEY")
27
+ client = openai.OpenAI(
28
+ api_key=api_key,
29
+ base_url="https://openai.gateway.salt-lab.org/v1",
30
+ )
31
+ MODEL="gpt-4o"
32
+
33
+ # Whisperx config
34
+ device = "cpu"
35
+ batch_size = 16 # reduce if low on GPU mem
36
+ compute_type = "int8" # change to "int8" if low on GPU mem (may reduce accuracy)
37
+ max_new_tokens = 512 # Example value, adjust as needed
38
+ clip_timestamps = True # Example value, adjust as needed
39
+ hallucination_silence_threshold = 0.5 # Example value, adjust as needed
40
+
41
+ # Load the model with the required arguments
42
+ default_asr_options = {
43
+ "max_new_tokens": max_new_tokens,
44
+ "clip_timestamps": clip_timestamps,
45
+ "hallucination_silence_threshold": hallucination_silence_threshold
46
+ }
47
+
48
+ model = whisperx.load_model("large-v2", device, compute_type=compute_type, **default_asr_options)
49
+ '''
50
+ Video
51
+ '''
52
+ video_file = None
53
+ audio_path=None
54
+ base64Frames = []
55
+ transcript=""
56
+
57
+ def process_video(video_path, seconds_per_frame=2):
58
+ global base64Frames, audio_path
59
+ base_video_path, _ = os.path.splitext(video_path)
60
+
61
+ video = cv2.VideoCapture(video_path)
62
+ total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
63
+ fps = video.get(cv2.CAP_PROP_FPS)
64
+ frames_to_skip = int(fps * seconds_per_frame)
65
+ curr_frame=0
66
+
67
+ while curr_frame < total_frames - 1:
68
+ video.set(cv2.CAP_PROP_POS_FRAMES, curr_frame)
69
+ success, frame = video.read()
70
+ if not success:
71
+ break
72
+ _, buffer = cv2.imencode(".jpg", frame)
73
+ base64Frames.append(base64.b64encode(buffer).decode("utf-8"))
74
+ curr_frame += frames_to_skip
75
+ video.release()
76
+
77
+ audio_path = "./TEST.mp3"
78
+ clip = VideoFileClip(video_path)
79
+ clip.audio.write_audiofile(audio_path, bitrate="32k")
80
+ clip.audio.close()
81
+ clip.close()
82
+ # transcribe_video(audio_path)
83
+ print(f"Extracted {len(base64Frames)} frames")
84
+ print(f"Extracted audio to {audio_path}")
85
+ return base64Frames, audio_path
86
+
87
+ chat_history = []
88
+ # chat_history.append({
89
+ # "role": "system",
90
+ # "content": (
91
+ # """
92
+ # You are an assistant chatbot for a Speech Language Pathologist (SLP).
93
+ # Your task is to help analyze a provided video of a therapy session and answer questions accurately.
94
+ # Provide timestamps for specific events or behaviors mentioned. Conclude each response with possible follow-up questions.
95
+
96
+ # Follow these steps:
97
+
98
+ # 1. Suggest to the user to ask, “To get started, you can try asking me how many people there are in the video.”
99
+ # 2. Detect how many people are in the video.
100
+ # 2. Suggest to the user to tell you the names of the people in the video, starting from left to right.
101
+ # 3. After receiving the names, respond with, “Ok thank you! Now you can ask me any questions about this video.”
102
+ # 4. If the user asks about a behavior, respond with, “My understanding of this behavior is [xxx - AI generated output]. Is this a behavior that you want to track? If it is, please define this behavior and tell me more about it so I can analyze it more accurately according to your practice.”
103
+ # 5. If you receive names, confirm that these are the names of the people from left to right.
104
+ # """
105
+ # )
106
+ # })
107
+
108
+ def transcribe_video(filename):
109
+ global transcript
110
+ if not audio_path:
111
+ raise ValueError("Audio path is None")
112
+ print(audio_path)
113
+ audio = whisperx.load_audio(audio_path)
114
+ result = model.transcribe(audio, batch_size=batch_size)
115
+
116
+ model_a, metadata = whisperx.load_align_model(language_code=result["language"], device=device)
117
+ result = whisperx.align(result["segments"], model_a, metadata, audio, device, return_char_alignments=False)
118
+
119
+
120
+ hf_auth_token = os.getenv("HF_AUTH_TOKEN")
121
+ diarize_model = whisperx.DiarizationPipeline(use_auth_token=hf_auth_token, device=device)
122
+
123
+ diarize_segments = diarize_model(audio)
124
+
125
+ dia_result = whisperx.assign_word_speakers(diarize_segments, result)
126
+
127
+ for res in dia_result["segments"]:
128
+ # transcript += "Speaker: " + str(res.get("speaker", None)) + "\n"
129
+ transcript += "Dialogue: " + str(res["text"].lstrip()) + "\n"
130
+ transcript += "start: " + str(int(res["start"])) + "\n"
131
+ transcript += "end: " + str(int(res["end"])) + "\n"
132
+ transcript += "\n"
133
+
134
+ return transcript
135
+
136
+
137
+ def handle_video(video=None):
138
+ global video_file, base64Frames, audio_path, chat_history, transcript
139
+
140
+ if video is None:
141
+ # Load example video
142
+ video = "./TEST.mp4"
143
+
144
+ base64Frames, audio_path = process_video(video_path=video, seconds_per_frame=100)
145
+ chat_history.append({
146
+ "role": "user",
147
+ "content": [
148
+ {"type": "text", "text": "These are the frames from the video."},
149
+ *map(lambda x: {"type": "image_url", "image_url": {"url": f'data:image/jpg;base64,{x}', "detail": "low"}}, base64Frames)
150
+ ]
151
+ })
152
+
153
+ if transcript:
154
+ chat_history[-1]['content'].append({
155
+ "type": "text",
156
+ "text": f"Also, below is the template of transcript from the video:\n"
157
+ "Speaker: <the speaker of the dialogue>\n"
158
+ "Dialogue: <the text of the dialogue>\n"
159
+ "start: <the starting timestamp of the dialogue in the video in second>\n"
160
+ "end: <the ending timestamp of the dialogue in the video in second>\n"
161
+ f"Transcription: {transcript}"
162
+ })
163
+
164
+ video_file = video
165
+ return video_file
166
+
167
+ '''
168
+ Chatbot
169
+ '''
170
+
171
+ def new_prompt(prompt):
172
+ global chat_history, video_file
173
+ chat_history.append({"role": "user","content": prompt,})
174
+ MODEL="gpt-4o"
175
+ # print(chat_history)
176
+ print(transcript)
177
+ try:
178
+ if video_file:
179
+ # Video exists and is processed
180
+ response = client.chat.completions.create(model=MODEL,messages=chat_history,temperature=0,)
181
+ else:
182
+ # No video uploaded yet
183
+ response = client.chat.completions.create(model=MODEL,messages=chat_history,temperature=0,)
184
+
185
+ # Extract the text content from the response and append it to the chat history
186
+ assistant_message = response.choices[0].message.content
187
+ chat_history.append({'role': 'model', 'content': assistant_message})
188
+ print(assistant_message)
189
+ except Exception as e:
190
+ print("Error: ",e)
191
+ assistant_message = "API rate limit has been reached. Please wait a moment and try again."
192
+ chat_history.append({'role': 'model', 'content': assistant_message})
193
+
194
+ # except google.api_core.exceptions.ResourceExhausted:
195
+ # assistant_message = "API rate limit has been reached. Please wait a moment and try again."
196
+ # chat_history.append({'role': 'model', 'parts': [assistant_message]})
197
+ # except Exception as e:
198
+ # assistant_message = f"An error occurred: {str(e)}"
199
+ # chat_history.append({'role': 'model', 'parts': [assistant_message]})
200
+
201
+ return chat_history
202
+
203
+ def user_input(user_message, history):
204
+ return "", history + [[user_message, None]]
205
+
206
+ def bot_response(history):
207
+ user_message = history[-1][0]
208
+ updated_history = new_prompt(user_message)
209
+ assistant_message = updated_history[-1]['content']
210
+ history[-1][1] = assistant_message
211
+ yield history
212
+
213
+
214
+ '''
215
+ Behaivor box
216
+ '''
217
+ initial_behaviors = [
218
+ ("Initiating Behavioral Request (IBR)",
219
+ ("The child's skill in using behavior(s) to elicit aid in obtaining an object, or object related event",
220
+ ["00:10", "00:45", "01:30"])),
221
+
222
+ ("Initiating Joint Attention (IJA)",
223
+ ("The child's skill in using behavior(s) to initiate shared attention to objects or events.",
224
+ ["00:15", "00:50", "01:40"])),
225
+
226
+ ("Responding to Joint Attention (RJA)",
227
+ ("The child's skill in following the examiner’s line of regard and pointing gestures.",
228
+ ["00:20", "01:00", "02:00"])),
229
+
230
+ ("Initiating Social Interaction (ISI)",
231
+ ("The child's skill at initiating turn-taking sequences and the tendency to tease the tester",
232
+ ["00:20", "00:50", "02:00"])),
233
+
234
+ ("Responding to Social Interaction (RSI)",
235
+ ("The child’s skill in responding to turn-taking interactions initiated by the examiner.",
236
+ ["00:20", "01:00", "02:00"]))
237
+ ]
238
+
239
+ behaviors = initial_behaviors
240
+ behavior_bank = []
241
+
242
+ def add_or_update_behavior(name, definition, timestamps, selected_behavior):
243
+ global behaviors, behavior_bank
244
+ if selected_behavior: # Update existing behavior
245
+ for i, (old_name, _) in enumerate(behaviors):
246
+ if old_name == selected_behavior:
247
+ behaviors[i] = (name, (definition, timestamps))
248
+ break
249
+ # Update behavior in the bank if it exists
250
+ behavior_bank = [name if b == selected_behavior else b for b in behavior_bank]
251
+ else: # Add new behavior
252
+ new_behavior = (name, (definition, timestamps))
253
+ behaviors.append(new_behavior)
254
+ choices = [b[0] for b in behaviors]
255
+ return gr.Dropdown(choices=choices, value=None, interactive=True), gr.CheckboxGroup(choices=behavior_bank, value=behavior_bank, interactive=True), "", "", ""
256
+
257
+ def add_to_behaivor_bank(selected_behavior, checkbox_group_values):
258
+ global behavior_bank
259
+ if selected_behavior and selected_behavior not in checkbox_group_values:
260
+ checkbox_group_values.append(selected_behavior)
261
+ behavior_bank = checkbox_group_values
262
+ return gr.CheckboxGroup(choices=checkbox_group_values, value=checkbox_group_values, interactive=True), gr.Dropdown(value=None,interactive=True)
263
+
264
+ def delete_behavior(selected_behavior, checkbox_group_values):
265
+ global behaviors, behavior_bank
266
+ behaviors = [b for b in behaviors if b[0] != selected_behavior]
267
+ behavior_bank = [b for b in behavior_bank if b != selected_behavior]
268
+ updated_choices = [b[0] for b in behaviors]
269
+ updated_checkbox_group = [cb for cb in checkbox_group_values if cb != selected_behavior]
270
+ return gr.Dropdown(choices=updated_choices, value=None, interactive=True), gr.CheckboxGroup(choices=updated_checkbox_group, value=updated_checkbox_group, interactive=True)
271
+
272
+ def edit_behavior(selected_behavior):
273
+ for name, (definition, timestamps) in behaviors:
274
+ if name == selected_behavior:
275
+ # Return values to populate textboxes
276
+ return name, definition, timestamps
277
+ return "", "", ""
278
+
279
+
280
+ welcome_message = """
281
+ Hello! I'm your AI assistant.
282
+ I can help you analyze your video sessions following your instructions.
283
+ To get started, please upload a video or add your behaviors to the Behavior Bank using the Behavior Manager.
284
+ """
285
+ #If you want to tell me about the people in the video, please name them starting from left to right.
286
+
287
+ css="""
288
+ body {
289
+ background-color: #edf1fa; /* offwhite */
290
+ }
291
+ .gradio-container {
292
+ background-color: #edf1fa; /* offwhite */
293
+ }
294
+ .column-form .wrap {
295
+ flex-direction: column;
296
+ }
297
+ .sidebar {
298
+ background: #ffffff;
299
+ padding: 10px;
300
+ border-right: 1px solid #dee2e6;
301
+ }
302
+ .content {
303
+ padding: 10px;
304
+ }
305
+ """
306
+
307
+ '''
308
+ Gradio Demo
309
+ '''
310
+ with gr.Blocks(theme='base', css=css, title="Soap.AI") as demo:
311
+ gr.Markdown("# 🤖 AI-Supported SOAP Generation")
312
+
313
+ with gr.Row():
314
+ with gr.Column():
315
+ video = gr.Video(label="Video", visible=True, height=360, container=True)
316
+ with gr.Row():
317
+ with gr.Column(min_width=1, scale=1):
318
+ video_upload_button = gr.Button("Analyze Video", variant="primary")
319
+ with gr.Column(min_width=1, scale=1):
320
+ example_video_button = gr.Button("Load Example Video")
321
+
322
+ video_upload_button.click(handle_video, inputs=video, outputs=video)
323
+ example_video_button.click(handle_video, None, outputs=video)
324
+
325
+ with gr.Column():
326
+ chat_section = gr.Group(visible=True)
327
+ with chat_section:
328
+ chatbot = gr.Chatbot(elem_id="chatbot",
329
+ container=True,
330
+ likeable=True,
331
+ value=[[None, welcome_message]],
332
+ avatar_images=(None, "./avatar.webp"))
333
+ with gr.Row():
334
+ txt = gr.Textbox(show_label=False, placeholder="Type here!")
335
+ with gr.Row():
336
+ send_btn = gr.Button("Send Message", elem_id="send-btn", variant="primary")
337
+ clear_btn = gr.Button("Clear Chat", elem_id="clear-btn")
338
+
339
+ with gr.Row():
340
+ behaivor_bank = gr.CheckboxGroup(label="Behavior Bank",
341
+ choices=[],
342
+ interactive=True,
343
+ info="A space to store all the behaviors you want to analyze.")
344
+ open_sidebar_btn = gr.Button("Show Behavior Manager", scale=0)
345
+ close_sidebar_btn = gr.Button("Hide Behavior Manager", visible=False, scale=0)
346
+
347
+ txt.submit(user_input, [txt, chatbot], [txt, chatbot], queue=False).then(
348
+ bot_response, chatbot, chatbot)
349
+ send_btn.click(user_input, [txt, chatbot], [txt, chatbot], queue=False).then(
350
+ bot_response, chatbot, chatbot)
351
+ clear_btn.click(lambda: None, None, chatbot, queue=False)
352
+
353
+ # Define a sidebar column that is initially hidden
354
+ with gr.Column(visible=False, min_width=200, scale=0.5, elem_classes="sidebar") as sidebar:
355
+ behavior_dropdown = gr.Dropdown(label="Behavior Collection",
356
+ choices=behaviors,
357
+ interactive=True,
358
+ container=True,
359
+ elem_classes="column-form",
360
+ info="Choose a behavior to add to the bank, edit or remove.")
361
+ with gr.Row():
362
+ add_toBank_button = gr.Button("Add Behavior to Bank", variant="primary")
363
+ edit_button = gr.Button("Edit Behavior")
364
+ delete_button = gr.Button("Remove Behavior")
365
+
366
+ with gr.Row():
367
+ name_input = gr.Textbox(label="Behavior Name",
368
+ placeholder="(e.g., IBR)",
369
+ info="The name you give to the specific behavior you're tracking or analyzing.")
370
+ timestamps_input = gr.Textbox(label="Timestamps MM:SS",
371
+ placeholder="(e.g., (01:15,01:35) )",
372
+ info="The exact times during a session when you saw the behavior. The first two digits represent minutes and the last two digits represent seconds.")
373
+ definition_input = gr.Textbox(lines=3,
374
+ label="Behavior Definition",
375
+ placeholder="(e.g., the child's skill in using behavior(s) to elicit aid in obtaining an object, or object related event)",
376
+ info="Provide a clear definition of the behavior.")
377
+
378
+ with gr.Row():
379
+ submit_button = gr.Button("Save Behavior", variant="primary")
380
+
381
+ submit_button.click(fn=add_or_update_behavior,
382
+ inputs=[name_input, definition_input, timestamps_input, behavior_dropdown],
383
+ outputs=[behavior_dropdown, behaivor_bank, name_input, definition_input, timestamps_input])
384
+
385
+ add_toBank_button.click(fn=add_to_behaivor_bank,
386
+ inputs=[behavior_dropdown, behaivor_bank],
387
+ outputs=[behaivor_bank, behavior_dropdown])
388
+
389
+ delete_button.click(fn=delete_behavior,
390
+ inputs=[behavior_dropdown, behaivor_bank],
391
+ outputs=[behavior_dropdown, behaivor_bank])
392
+
393
+ edit_button.click(fn=edit_behavior,
394
+ inputs=[behavior_dropdown],
395
+ outputs=[name_input, definition_input, timestamps_input])
396
+
397
+ # Function to open the sidebar
398
+ open_sidebar_btn.click(lambda: {
399
+ open_sidebar_btn: gr.Button(visible=False),
400
+ close_sidebar_btn: gr.Button(visible=True),
401
+ sidebar: gr.Column(visible=True)
402
+ }, outputs=[open_sidebar_btn, close_sidebar_btn, sidebar])
403
+
404
+ # Function to close the sidebar
405
+ close_sidebar_btn.click(lambda: {
406
+ open_sidebar_btn: gr.Button(visible=True),
407
+ close_sidebar_btn: gr.Button(visible=False),
408
+ sidebar: gr.Column(visible=False)
409
+ }, outputs=[open_sidebar_btn, close_sidebar_btn, sidebar])
410
+
411
+ # Launch the demo
412
  demo.launch(share=True)