adiv07 commited on
Commit
7d82b61
·
verified ·
1 Parent(s): 0d70e48

Upload 12 files

Browse files
Files changed (12) hide show
  1. .gitattributes +36 -35
  2. .gitignore +1 -0
  3. Gpt4oDemo.py +402 -0
  4. Helper.py +79 -0
  5. README.md +8 -13
  6. TEST.mp3 +0 -0
  7. TEST.mp4 +3 -0
  8. avatar.webp +0 -0
  9. hardCodedData.py +98 -0
  10. newDemo.py +311 -0
  11. requirements.txt +166 -1
  12. style.css +50 -0
.gitattributes CHANGED
@@ -1,35 +1,36 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ TEST.mp4 filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .env
Gpt4oDemo.py ADDED
@@ -0,0 +1,402 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import plotly.graph_objs as go
3
+ import numpy as np
4
+ import time
5
+ from openai import OpenAI
6
+ import os
7
+ from hardCodedData import *
8
+ from Helper import *
9
+ import cv2
10
+ from moviepy.editor import VideoFileClip
11
+ import time
12
+ import base64
13
+ import whisperx
14
+ import gc
15
+ from moviepy.editor import VideoFileClip
16
+ from dotenv import load_dotenv
17
+
18
+ load_dotenv()
19
+
20
+ '''
21
+ Model Information
22
+ GPT4o
23
+ '''
24
+
25
+ import openai
26
+ api_key = os.getenv("OPENAI_API_KEY")
27
+ client = openai.OpenAI(
28
+ api_key=api_key,
29
+ base_url="https://openai.gateway.salt-lab.org/v1",
30
+ )
31
+ MODEL="gpt-4o"
32
+
33
+ # Whisperx config
34
+ device = "cpu"
35
+ batch_size = 16 # reduce if low on GPU mem
36
+ compute_type = "int8" # change to "int8" if low on GPU mem (may reduce accuracy)
37
+ model = whisperx.load_model("large-v2", device, compute_type=compute_type)
38
+
39
+ '''
40
+ Video
41
+ '''
42
+ video_file = None
43
+ audio_path=None
44
+ base64Frames = []
45
+ transcript=""
46
+
47
+ def process_video(video_path, seconds_per_frame=2):
48
+ global base64Frames, audio_path
49
+ base_video_path, _ = os.path.splitext(video_path)
50
+
51
+ video = cv2.VideoCapture(video_path)
52
+ total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
53
+ fps = video.get(cv2.CAP_PROP_FPS)
54
+ frames_to_skip = int(fps * seconds_per_frame)
55
+ curr_frame=0
56
+
57
+ while curr_frame < total_frames - 1:
58
+ video.set(cv2.CAP_PROP_POS_FRAMES, curr_frame)
59
+ success, frame = video.read()
60
+ if not success:
61
+ break
62
+ _, buffer = cv2.imencode(".jpg", frame)
63
+ base64Frames.append(base64.b64encode(buffer).decode("utf-8"))
64
+ curr_frame += frames_to_skip
65
+ video.release()
66
+
67
+ audio_path = "./TEST.mp3"
68
+ clip = VideoFileClip(video_path)
69
+ clip.audio.write_audiofile(audio_path, bitrate="32k")
70
+ clip.audio.close()
71
+ clip.close()
72
+ # transcribe_video(audio_path)
73
+ print(f"Extracted {len(base64Frames)} frames")
74
+ print(f"Extracted audio to {audio_path}")
75
+ return base64Frames, audio_path
76
+
77
+ chat_history = []
78
+ # chat_history.append({
79
+ # "role": "system",
80
+ # "content": (
81
+ # """
82
+ # You are an assistant chatbot for a Speech Language Pathologist (SLP).
83
+ # Your task is to help analyze a provided video of a therapy session and answer questions accurately.
84
+ # Provide timestamps for specific events or behaviors mentioned. Conclude each response with possible follow-up questions.
85
+
86
+ # Follow these steps:
87
+
88
+ # 1. Suggest to the user to ask, “To get started, you can try asking me how many people there are in the video.”
89
+ # 2. Detect how many people are in the video.
90
+ # 2. Suggest to the user to tell you the names of the people in the video, starting from left to right.
91
+ # 3. After receiving the names, respond with, “Ok thank you! Now you can ask me any questions about this video.”
92
+ # 4. If the user asks about a behavior, respond with, “My understanding of this behavior is [xxx - AI generated output]. Is this a behavior that you want to track? If it is, please define this behavior and tell me more about it so I can analyze it more accurately according to your practice.”
93
+ # 5. If you receive names, confirm that these are the names of the people from left to right.
94
+ # """
95
+ # )
96
+ # })
97
+
98
+ def transcribe_video(filename):
99
+ global transcript
100
+ if not audio_path:
101
+ raise ValueError("Audio path is None")
102
+ print(audio_path)
103
+ audio = whisperx.load_audio(audio_path)
104
+ result = model.transcribe(audio, batch_size=batch_size)
105
+
106
+ model_a, metadata = whisperx.load_align_model(language_code=result["language"], device=device)
107
+ result = whisperx.align(result["segments"], model_a, metadata, audio, device, return_char_alignments=False)
108
+
109
+
110
+ hf_auth_token = os.getenv("HF_AUTH_TOKEN")
111
+ diarize_model = whisperx.DiarizationPipeline(use_auth_token=hf_auth_token, device=device)
112
+
113
+ diarize_segments = diarize_model(audio)
114
+
115
+ dia_result = whisperx.assign_word_speakers(diarize_segments, result)
116
+
117
+ for res in dia_result["segments"]:
118
+ # transcript += "Speaker: " + str(res.get("speaker", None)) + "\n"
119
+ transcript += "Dialogue: " + str(res["text"].lstrip()) + "\n"
120
+ transcript += "start: " + str(int(res["start"])) + "\n"
121
+ transcript += "end: " + str(int(res["end"])) + "\n"
122
+ transcript += "\n"
123
+
124
+ return transcript
125
+
126
+
127
+ def handle_video(video=None):
128
+ global video_file, base64Frames, audio_path, chat_history, transcript
129
+
130
+ if video is None:
131
+ # Load example video
132
+ video = "./TEST.mp4"
133
+
134
+ base64Frames, audio_path = process_video(video_path=video, seconds_per_frame=100)
135
+ chat_history.append({
136
+ "role": "user",
137
+ "content": [
138
+ {"type": "text", "text": "These are the frames from the video."},
139
+ *map(lambda x: {"type": "image_url", "image_url": {"url": f'data:image/jpg;base64,{x}', "detail": "low"}}, base64Frames)
140
+ ]
141
+ })
142
+
143
+ if transcript:
144
+ chat_history[-1]['content'].append({
145
+ "type": "text",
146
+ "text": f"Also, below is the template of transcript from the video:\n"
147
+ "Speaker: <the speaker of the dialogue>\n"
148
+ "Dialogue: <the text of the dialogue>\n"
149
+ "start: <the starting timestamp of the dialogue in the video in second>\n"
150
+ "end: <the ending timestamp of the dialogue in the video in second>\n"
151
+ f"Transcription: {transcript}"
152
+ })
153
+
154
+ video_file = video
155
+ return video_file
156
+
157
+ '''
158
+ Chatbot
159
+ '''
160
+
161
+ def new_prompt(prompt):
162
+ global chat_history, video_file
163
+ chat_history.append({"role": "user","content": prompt,})
164
+ MODEL="gpt-4o"
165
+ # print(chat_history)
166
+ print(transcript)
167
+ try:
168
+ if video_file:
169
+ # Video exists and is processed
170
+ response = client.chat.completions.create(model=MODEL,messages=chat_history,temperature=0,)
171
+ else:
172
+ # No video uploaded yet
173
+ response = client.chat.completions.create(model=MODEL,messages=chat_history,temperature=0,)
174
+
175
+ # Extract the text content from the response and append it to the chat history
176
+ assistant_message = response.choices[0].message.content
177
+ chat_history.append({'role': 'model', 'content': assistant_message})
178
+ print(assistant_message)
179
+ except Exception as e:
180
+ print("Error: ",e)
181
+ assistant_message = "API rate limit has been reached. Please wait a moment and try again."
182
+ chat_history.append({'role': 'model', 'content': assistant_message})
183
+
184
+ # except google.api_core.exceptions.ResourceExhausted:
185
+ # assistant_message = "API rate limit has been reached. Please wait a moment and try again."
186
+ # chat_history.append({'role': 'model', 'parts': [assistant_message]})
187
+ # except Exception as e:
188
+ # assistant_message = f"An error occurred: {str(e)}"
189
+ # chat_history.append({'role': 'model', 'parts': [assistant_message]})
190
+
191
+ return chat_history
192
+
193
+ def user_input(user_message, history):
194
+ return "", history + [[user_message, None]]
195
+
196
+ def bot_response(history):
197
+ user_message = history[-1][0]
198
+ updated_history = new_prompt(user_message)
199
+ assistant_message = updated_history[-1]['content']
200
+ history[-1][1] = assistant_message
201
+ yield history
202
+
203
+
204
+ '''
205
+ Behaivor box
206
+ '''
207
+ initial_behaviors = [
208
+ ("Initiating Behavioral Request (IBR)",
209
+ ("The child's skill in using behavior(s) to elicit aid in obtaining an object, or object related event",
210
+ ["00:10", "00:45", "01:30"])),
211
+
212
+ ("Initiating Joint Attention (IJA)",
213
+ ("The child's skill in using behavior(s) to initiate shared attention to objects or events.",
214
+ ["00:15", "00:50", "01:40"])),
215
+
216
+ ("Responding to Joint Attention (RJA)",
217
+ ("The child's skill in following the examiner’s line of regard and pointing gestures.",
218
+ ["00:20", "01:00", "02:00"])),
219
+
220
+ ("Initiating Social Interaction (ISI)",
221
+ ("The child's skill at initiating turn-taking sequences and the tendency to tease the tester",
222
+ ["00:20", "00:50", "02:00"])),
223
+
224
+ ("Responding to Social Interaction (RSI)",
225
+ ("The child’s skill in responding to turn-taking interactions initiated by the examiner.",
226
+ ["00:20", "01:00", "02:00"]))
227
+ ]
228
+
229
+ behaviors = initial_behaviors
230
+ behavior_bank = []
231
+
232
+ def add_or_update_behavior(name, definition, timestamps, selected_behavior):
233
+ global behaviors, behavior_bank
234
+ if selected_behavior: # Update existing behavior
235
+ for i, (old_name, _) in enumerate(behaviors):
236
+ if old_name == selected_behavior:
237
+ behaviors[i] = (name, (definition, timestamps))
238
+ break
239
+ # Update behavior in the bank if it exists
240
+ behavior_bank = [name if b == selected_behavior else b for b in behavior_bank]
241
+ else: # Add new behavior
242
+ new_behavior = (name, (definition, timestamps))
243
+ behaviors.append(new_behavior)
244
+ choices = [b[0] for b in behaviors]
245
+ return gr.Dropdown(choices=choices, value=None, interactive=True), gr.CheckboxGroup(choices=behavior_bank, value=behavior_bank, interactive=True), "", "", ""
246
+
247
+ def add_to_behaivor_bank(selected_behavior, checkbox_group_values):
248
+ global behavior_bank
249
+ if selected_behavior and selected_behavior not in checkbox_group_values:
250
+ checkbox_group_values.append(selected_behavior)
251
+ behavior_bank = checkbox_group_values
252
+ return gr.CheckboxGroup(choices=checkbox_group_values, value=checkbox_group_values, interactive=True), gr.Dropdown(value=None,interactive=True)
253
+
254
+ def delete_behavior(selected_behavior, checkbox_group_values):
255
+ global behaviors, behavior_bank
256
+ behaviors = [b for b in behaviors if b[0] != selected_behavior]
257
+ behavior_bank = [b for b in behavior_bank if b != selected_behavior]
258
+ updated_choices = [b[0] for b in behaviors]
259
+ updated_checkbox_group = [cb for cb in checkbox_group_values if cb != selected_behavior]
260
+ return gr.Dropdown(choices=updated_choices, value=None, interactive=True), gr.CheckboxGroup(choices=updated_checkbox_group, value=updated_checkbox_group, interactive=True)
261
+
262
+ def edit_behavior(selected_behavior):
263
+ for name, (definition, timestamps) in behaviors:
264
+ if name == selected_behavior:
265
+ # Return values to populate textboxes
266
+ return name, definition, timestamps
267
+ return "", "", ""
268
+
269
+
270
+ welcome_message = """
271
+ Hello! I'm your AI assistant.
272
+ I can help you analyze your video sessions following your instructions.
273
+ To get started, please upload a video or add your behaviors to the Behavior Bank using the Behavior Manager.
274
+ """
275
+ #If you want to tell me about the people in the video, please name them starting from left to right.
276
+
277
+ css="""
278
+ body {
279
+ background-color: #edf1fa; /* offwhite */
280
+ }
281
+ .gradio-container {
282
+ background-color: #edf1fa; /* offwhite */
283
+ }
284
+ .column-form .wrap {
285
+ flex-direction: column;
286
+ }
287
+ .sidebar {
288
+ background: #ffffff;
289
+ padding: 10px;
290
+ border-right: 1px solid #dee2e6;
291
+ }
292
+ .content {
293
+ padding: 10px;
294
+ }
295
+ """
296
+
297
+ '''
298
+ Gradio Demo
299
+ '''
300
+ with gr.Blocks(theme='base', css=css, title="Soap.AI") as demo:
301
+ gr.Markdown("# 🤖 AI-Supported SOAP Generation")
302
+
303
+ with gr.Row():
304
+ with gr.Column():
305
+ video = gr.Video(label="Video", visible=True, height=360, container=True)
306
+ with gr.Row():
307
+ with gr.Column(min_width=1, scale=1):
308
+ video_upload_button = gr.Button("Analyze Video", variant="primary")
309
+ with gr.Column(min_width=1, scale=1):
310
+ example_video_button = gr.Button("Load Example Video")
311
+
312
+ video_upload_button.click(handle_video, inputs=video, outputs=video)
313
+ example_video_button.click(handle_video, None, outputs=video)
314
+
315
+ with gr.Column():
316
+ chat_section = gr.Group(visible=True)
317
+ with chat_section:
318
+ chatbot = gr.Chatbot(elem_id="chatbot",
319
+ container=True,
320
+ likeable=True,
321
+ value=[[None, welcome_message]],
322
+ avatar_images=(None, "./avatar.webp"))
323
+ with gr.Row():
324
+ txt = gr.Textbox(show_label=False, placeholder="Type here!")
325
+ with gr.Row():
326
+ send_btn = gr.Button("Send Message", elem_id="send-btn", variant="primary")
327
+ clear_btn = gr.Button("Clear Chat", elem_id="clear-btn")
328
+
329
+ with gr.Row():
330
+ behaivor_bank = gr.CheckboxGroup(label="Behavior Bank",
331
+ choices=[],
332
+ interactive=True,
333
+ info="A space to store all the behaviors you want to analyze.")
334
+ open_sidebar_btn = gr.Button("Show Behavior Manager", scale=0)
335
+ close_sidebar_btn = gr.Button("Hide Behavior Manager", visible=False, scale=0)
336
+
337
+ txt.submit(user_input, [txt, chatbot], [txt, chatbot], queue=False).then(
338
+ bot_response, chatbot, chatbot)
339
+ send_btn.click(user_input, [txt, chatbot], [txt, chatbot], queue=False).then(
340
+ bot_response, chatbot, chatbot)
341
+ clear_btn.click(lambda: None, None, chatbot, queue=False)
342
+
343
+ # Define a sidebar column that is initially hidden
344
+ with gr.Column(visible=False, min_width=200, scale=0.5, elem_classes="sidebar") as sidebar:
345
+ behavior_dropdown = gr.Dropdown(label="Behavior Collection",
346
+ choices=behaviors,
347
+ interactive=True,
348
+ container=True,
349
+ elem_classes="column-form",
350
+ info="Choose a behavior to add to the bank, edit or remove.")
351
+ with gr.Row():
352
+ add_toBank_button = gr.Button("Add Behavior to Bank", variant="primary")
353
+ edit_button = gr.Button("Edit Behavior")
354
+ delete_button = gr.Button("Remove Behavior")
355
+
356
+ with gr.Row():
357
+ name_input = gr.Textbox(label="Behavior Name",
358
+ placeholder="(e.g., IBR)",
359
+ info="The name you give to the specific behavior you're tracking or analyzing.")
360
+ timestamps_input = gr.Textbox(label="Timestamps MM:SS",
361
+ placeholder="(e.g., (01:15,01:35) )",
362
+ info="The exact times during a session when you saw the behavior. The first two digits represent minutes and the last two digits represent seconds.")
363
+ definition_input = gr.Textbox(lines=3,
364
+ label="Behavior Definition",
365
+ placeholder="(e.g., the child's skill in using behavior(s) to elicit aid in obtaining an object, or object related event)",
366
+ info="Provide a clear definition of the behavior.")
367
+
368
+ with gr.Row():
369
+ submit_button = gr.Button("Save Behavior", variant="primary")
370
+
371
+ submit_button.click(fn=add_or_update_behavior,
372
+ inputs=[name_input, definition_input, timestamps_input, behavior_dropdown],
373
+ outputs=[behavior_dropdown, behaivor_bank, name_input, definition_input, timestamps_input])
374
+
375
+ add_toBank_button.click(fn=add_to_behaivor_bank,
376
+ inputs=[behavior_dropdown, behaivor_bank],
377
+ outputs=[behaivor_bank, behavior_dropdown])
378
+
379
+ delete_button.click(fn=delete_behavior,
380
+ inputs=[behavior_dropdown, behaivor_bank],
381
+ outputs=[behavior_dropdown, behaivor_bank])
382
+
383
+ edit_button.click(fn=edit_behavior,
384
+ inputs=[behavior_dropdown],
385
+ outputs=[name_input, definition_input, timestamps_input])
386
+
387
+ # Function to open the sidebar
388
+ open_sidebar_btn.click(lambda: {
389
+ open_sidebar_btn: gr.Button(visible=False),
390
+ close_sidebar_btn: gr.Button(visible=True),
391
+ sidebar: gr.Column(visible=True)
392
+ }, outputs=[open_sidebar_btn, close_sidebar_btn, sidebar])
393
+
394
+ # Function to close the sidebar
395
+ close_sidebar_btn.click(lambda: {
396
+ open_sidebar_btn: gr.Button(visible=True),
397
+ close_sidebar_btn: gr.Button(visible=False),
398
+ sidebar: gr.Column(visible=False)
399
+ }, outputs=[open_sidebar_btn, close_sidebar_btn, sidebar])
400
+
401
+ # Launch the demo
402
+ demo.launch(share=True)
Helper.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ # table
4
+ css = """
5
+ .scrollable-table {
6
+ max-height: 300px;
7
+ overflow-y: auto;
8
+ border: 1px solid #ccc;
9
+ padding: 10px;
10
+
11
+ }
12
+ .scrollable-table table {
13
+ width: 100%;
14
+ border-collapse: collapse;
15
+ background-color: #FFFFFF;
16
+
17
+ }
18
+ .scrollable-table th, .scrollable-table td {
19
+ border: 1px solid #ddd;
20
+ padding: 8px;
21
+ text-align: left;
22
+ color: #000000
23
+
24
+ }
25
+ .scrollable-table th {
26
+ background-color: #EAF2FF;
27
+ color: #3366CC;
28
+ }
29
+ """
30
+
31
+ def parse_transcript(transcript):
32
+ # Regex to match the timestamps and the text
33
+ pattern = re.compile(r'(\d{2}:\d{2})\s+(.+?)(?=\d{2}:\d{2}|$)', re.DOTALL)
34
+ matches = pattern.findall(transcript)
35
+
36
+ timestamps = []
37
+ texts = []
38
+
39
+ for match in matches:
40
+ timestamps.append(match[0])
41
+ texts.append(match[1].strip())
42
+
43
+ return timestamps, texts
44
+
45
+ def create_transcript_table(timestamps, transcript_text):
46
+ table_html = '<div class="scrollable-table">\n'
47
+ table_html += '<table>\n'
48
+ table_html += ' <thead>\n'
49
+ table_html += ' <tr>\n'
50
+ table_html += ' <th>Timestamp</th>\n'
51
+ table_html += ' <th>Transcript</th>\n'
52
+ table_html += ' </tr>\n'
53
+ table_html += ' </thead>\n'
54
+ table_html += ' <tbody>\n'
55
+ for ts, text in zip(timestamps, transcript_text):
56
+ table_html += ' <tr>\n'
57
+ table_html += f' <td>{ts}</td>\n'
58
+ table_html += f' <td>{text}</td>\n'
59
+ table_html += ' </tr>\n'
60
+ table_html += ' </tbody>\n'
61
+ table_html += '</table>\n'
62
+ return table_html
63
+
64
+ def filter_transcript():
65
+ timestamps = [
66
+ "15.0 - 17.0",
67
+ "38.08 - 39.50"
68
+ ]
69
+ transcript_text = [
70
+ "Sad (prompt; 1st)",
71
+ "Because he fell (no prompt; 2nd)"
72
+ ]
73
+ return timestamps, transcript_text
74
+
75
+ # Guidance Generation Function
76
+ def generate_guidance():
77
+ guidance_text = """ Engagement: Student may display behaviors such as rocking when showing engagement.
78
+ Impact factors: Weather (e.g., raining) can impact student’s performance."""
79
+ return guidance_text
README.md CHANGED
@@ -1,13 +1,8 @@
1
- ---
2
- title: SOAP Temp
3
- emoji: 💬
4
- colorFrom: yellow
5
- colorTo: purple
6
- sdk: gradio
7
- sdk_version: 4.36.1
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- ---
12
-
13
- An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).
 
1
+ ---
2
+ title: SOAP
3
+ app_file: newDemo.py
4
+ sdk: gradio
5
+ sdk_version: 4.31.0
6
+ ---
7
+ # SOAPdemo
8
+ SOAP demo by Qingxiao/Parisa/Aditya
 
 
 
 
 
TEST.mp3 ADDED
Binary file (494 kB). View file
 
TEST.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6278941748127fc61cee67d4e7477b842f6890a3df50afcbece9960137352898
3
+ size 22321588
avatar.webp ADDED
hardCodedData.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Define chatbot and checkbox options
2
+ First_response = """
3
+ Four people identified in the video. Please select the people you want to analyse:
4
+ """
5
+ Second_response="""Let’s confirm the learning goals:"""
6
+
7
+ non_verbal_options = [
8
+ "Following directions",
9
+ "On-task percentage",
10
+ "Elopement attempts",
11
+ "Falling response",
12
+ "Overwhelm indicators",
13
+ "Avoidance behaviors",
14
+ "Harm attempts",
15
+ "Eye contact percentage",
16
+ "Eye contact duration",
17
+ "Body language",
18
+ "Stimming behaviors",
19
+ "Attention tracking",
20
+ "Whole-body listening",
21
+ "Gaze tracking",
22
+ "Inappropriate touching",
23
+ "Listening behaviors",
24
+ "Noises or gestures"
25
+ ]
26
+
27
+ verbal_options = [
28
+ "Target sounds",
29
+ "Word position",
30
+ "Sound substitutions",
31
+ "Articulation frequency",
32
+ "Mean Length of Utterance (MLU)",
33
+ "Disfluencies",
34
+ "Questions asked",
35
+ "Student answers",
36
+ "Response time",
37
+ "Peer responses",
38
+ "On-topic responses",
39
+ "Correct response ratio",
40
+ "Cues needed",
41
+ "Pragmatic skills",
42
+ "Conversation volume",
43
+ "Initiating conversation",
44
+ "Interruptions",
45
+ "Following directions",
46
+ "Answering questions",
47
+ "Idioms/sarcasm",
48
+ "Grammar and syntax",
49
+ "Vocabulary use",
50
+ "Answering wh-questions"
51
+ ]
52
+
53
+ environmental_options = [
54
+ "External noises",
55
+ "Room activity",
56
+ "Room temperature",
57
+ "Fluorescent lights",
58
+ "School vs home",
59
+ "Session location",
60
+ "Furniture size",
61
+ ]
62
+
63
+
64
+ client_options=[
65
+ "David",
66
+ "John",
67
+ "Sam",
68
+ "Mike",
69
+ ]
70
+
71
+ data_insights = """
72
+ - On 9/17, Mike seemed to be distracted by the lighting in the room, which could be the reason he had more approximate words than correct words.
73
+ - On 9/18, there was a level of outside noise present during the session due to weather. This was also where Mike’s incorrect words were at their highest.
74
+ - On 9/19, Mike performed the best he has yet!
75
+ - Between 9/20 - 9/22, Mike's approximate words have tapered out.
76
+ """
77
+
78
+
79
+ subjective_report = """
80
+ Mike exhibited signs of stress at the start of the session due to the rainy weather, but he maintained engagement and participation throughout.
81
+ """
82
+
83
+ objective_report = """
84
+ - Number of Answers Given: This child named one of the characters in the story as 'Bob.'
85
+ - Cues Required: Responded directly to the SLP’s prompt without requiring additional cues.
86
+ - Observed Behavior: Participation in the naming process indicates engagement, though the child's involvement appears less proactive compared to the middle child.
87
+ 'Wh' Questions Addressed: Answered a 'who' question by providing a name for a character.
88
+ - Grammar and Complexity: The response was simple but appropriate for the task at hand.
89
+ - Visual Cues Used: It's not clear from the transcript if the child used visual cues, but the direct response to the SLP's question suggests attentiveness to verbal cues.
90
+ """
91
+
92
+ assessment_report = """
93
+ His ability to produce /er/ words when presented with a phonemic cue has also improved from only making 20% of words in yesterday’s session.
94
+ """
95
+
96
+ plan_report = """
97
+
98
+ """
newDemo.py ADDED
@@ -0,0 +1,311 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import plotly.graph_objs as go
3
+ import numpy as np
4
+ import time
5
+ import google.generativeai as genai
6
+ from hardCodedData import *
7
+ from Helper import *
8
+ import google
9
+
10
+ '''
11
+ Model Information
12
+ Gemini 1.5 pro
13
+ '''
14
+ GOOGLE_API_KEY = "api"
15
+ genai.configure(api_key="AIzaSyC6msuJuuRiXTplyOzgnlZchpu5_olBXYs")
16
+ generation_config = genai.GenerationConfig(temperature=0.5)
17
+
18
+ # Model configuration
19
+ model = genai.GenerativeModel(
20
+ model_name='gemini-1.5-pro-latest',
21
+ system_instruction= """
22
+ You are an assistant chatbot for a Speech Language Pathologist (SLP).
23
+ Your task is to help analyze a provided video of a therapy session and answer questions accurately.
24
+ Provide timestamps for specific events or behaviors mentioned. Conclude each response with possible follow-up questions.
25
+
26
+ Follow these steps:
27
+
28
+ 1. Suggest to the user to ask, “To get started, you can try asking me how many people there are in the video.”
29
+ 2. Detect how many people are in the video.
30
+ 2. Suggest to the user to tell you the names of the people in the video, starting from left to right.
31
+ 3. After receiving the names, respond with, “Ok thank you! Now you can ask me any questions about this video.”
32
+ 4. If the user asks about a behavior, respond with, “My understanding of this behavior is [xxx - AI generated output]. Is this a behavior that you want to track? If it is, please define this behavior and tell me more about it so I can analyze it more accurately according to your practice.”
33
+ 5. If you receive names, confirm that these are the names of the people from left to right.
34
+ """
35
+ )
36
+
37
+
38
+
39
+ '''
40
+ Video
41
+ '''
42
+ video_file = None
43
+
44
+ def handle_video(video=None):
45
+ global video_file
46
+ if video is None:
47
+ # Load example video
48
+ video = "./TEST.mp4"
49
+ isTest = True
50
+
51
+ video_file = genai.upload_file(path=video)
52
+
53
+ while video_file.state.name == "PROCESSING":
54
+ print(".", end="")
55
+ time.sleep(10)
56
+ video_file = genai.get_file(video_file.name)
57
+
58
+ if video_file.state.name == "FAILED":
59
+ raise ValueError(video_file.state.name)
60
+
61
+ if isTest:
62
+ return video
63
+ else:
64
+ return video_file
65
+
66
+ '''
67
+ Chatbot
68
+ '''
69
+ chat_history = []
70
+ def new_prompt(prompt):
71
+ global chat_history, video_file
72
+
73
+ # Append user prompt to chat history
74
+ chat_history.append({'role': 'user', 'parts': [prompt]})
75
+ try:
76
+
77
+ if video_file:
78
+ # Video exists and is processed
79
+ chat_history[-1]['parts'].extend([" from video: ", video_file])
80
+ response = model.generate_content(chat_history, request_options={"timeout": 600})
81
+ else:
82
+ # No video uploaded yet
83
+ response = model.generate_content(chat_history)
84
+
85
+ # Extract the text content from the response and append it to the chat history
86
+ assistant_message = response.candidates[0].content.parts[0].text
87
+ chat_history.append({'role': 'model', 'parts': [assistant_message]})
88
+
89
+ except google.api_core.exceptions.ResourceExhausted:
90
+ assistant_message = "API rate limit has been reached. Please wait a moment and try again."
91
+ chat_history.append({'role': 'model', 'parts': [assistant_message]})
92
+ except Exception as e:
93
+ assistant_message = f"An error occurred: {str(e)}"
94
+ chat_history.append({'role': 'model', 'parts': [assistant_message]})
95
+
96
+ return chat_history
97
+
98
+ def user_input(user_message, history):
99
+ return "", history + [[user_message, None]]
100
+
101
+ def bot_response(history):
102
+ user_message = history[-1][0]
103
+ print(history)
104
+ updated_history = new_prompt(user_message)
105
+ print(updated_history)
106
+ assistant_message = updated_history[-1]['parts'][0]
107
+ for i in range(len(assistant_message)):
108
+ time.sleep(0.05)
109
+ history[-1][1] = assistant_message[:i+1]
110
+ yield history
111
+
112
+
113
+ '''
114
+ Behaivor box
115
+ '''
116
+ initial_behaviors = [
117
+ ("Initiating Behavioral Request (IBR)",
118
+ ("The child's skill in using behavior(s) to elicit aid in obtaining an object, or object related event",
119
+ ["00:10", "00:45", "01:30"])),
120
+
121
+ ("Initiating Joint Attention (IJA)",
122
+ ("The child's skill in using behavior(s) to initiate shared attention to objects or events.",
123
+ ["00:15", "00:50", "01:40"])),
124
+
125
+ ("Responding to Joint Attention (RJA)",
126
+ ("The child's skill in following the examiner’s line of regard and pointing gestures.",
127
+ ["00:20", "01:00", "02:00"])),
128
+
129
+ ("Initiating Social Interaction (ISI)",
130
+ ("The child's skill at initiating turn-taking sequences and the tendency to tease the tester",
131
+ ["00:20", "00:50", "02:00"])),
132
+
133
+ ("Responding to Social Interaction (RSI)",
134
+ ("The child’s skill in responding to turn-taking interactions initiated by the examiner.",
135
+ ["00:20", "01:00", "02:00"]))
136
+ ]
137
+
138
+ behaviors = initial_behaviors
139
+ behavior_bank = []
140
+
141
+ def add_or_update_behavior(name, definition, timestamps, selected_behavior):
142
+ global behaviors, behavior_bank
143
+ if selected_behavior: # Update existing behavior
144
+ for i, (old_name, _) in enumerate(behaviors):
145
+ if old_name == selected_behavior:
146
+ behaviors[i] = (name, (definition, timestamps))
147
+ break
148
+ # Update behavior in the bank if it exists
149
+ behavior_bank = [name if b == selected_behavior else b for b in behavior_bank]
150
+ else: # Add new behavior
151
+ new_behavior = (name, (definition, timestamps))
152
+ behaviors.append(new_behavior)
153
+ choices = [b[0] for b in behaviors]
154
+ return gr.Dropdown(choices=choices, value=None, interactive=True), gr.CheckboxGroup(choices=behavior_bank, value=behavior_bank, interactive=True), "", "", ""
155
+
156
+ def add_to_behaivor_bank(selected_behavior, checkbox_group_values):
157
+ global behavior_bank
158
+ if selected_behavior and selected_behavior not in checkbox_group_values:
159
+ checkbox_group_values.append(selected_behavior)
160
+ behavior_bank = checkbox_group_values
161
+ return gr.CheckboxGroup(choices=checkbox_group_values, value=checkbox_group_values, interactive=True), gr.Dropdown(value=None,interactive=True)
162
+
163
+ def delete_behavior(selected_behavior, checkbox_group_values):
164
+ global behaviors, behavior_bank
165
+ behaviors = [b for b in behaviors if b[0] != selected_behavior]
166
+ behavior_bank = [b for b in behavior_bank if b != selected_behavior]
167
+ updated_choices = [b[0] for b in behaviors]
168
+ updated_checkbox_group = [cb for cb in checkbox_group_values if cb != selected_behavior]
169
+ return gr.Dropdown(choices=updated_choices, value=None, interactive=True), gr.CheckboxGroup(choices=updated_checkbox_group, value=updated_checkbox_group, interactive=True)
170
+
171
+ def edit_behavior(selected_behavior):
172
+ for name, (definition, timestamps) in behaviors:
173
+ if name == selected_behavior:
174
+ # Return values to populate textboxes
175
+ return name, definition, timestamps
176
+ return "", "", ""
177
+
178
+
179
+ welcome_message = """
180
+ Hello! I'm your AI assistant.
181
+ I can help you analyze your video sessions following your instructions.
182
+ To get started, please upload a video or add your behaviors to the Behavior Bank using the Behavior Manager.
183
+ """
184
+ #If you want to tell me about the people in the video, please name them starting from left to right.
185
+
186
+ css="""
187
+ body {
188
+ background-color: #edf1fa; /* offwhite */
189
+ }
190
+ .gradio-container {
191
+ background-color: #edf1fa; /* offwhite */
192
+ }
193
+ .column-form .wrap {
194
+ flex-direction: column;
195
+ }
196
+ .sidebar {
197
+ background: #ffffff;
198
+ padding: 10px;
199
+ border-right: 1px solid #dee2e6;
200
+ }
201
+ .content {
202
+ padding: 10px;
203
+ }
204
+ """
205
+
206
+ '''
207
+ Gradio Demo
208
+ '''
209
+ with gr.Blocks(theme='base', css=css, title="Soap.AI") as demo:
210
+ gr.Markdown("# 🤖 AI-Supported SOAP Generation")
211
+
212
+ with gr.Row():
213
+ with gr.Column():
214
+ video = gr.Video(label="Video", visible=True, height=360, container=True)
215
+ with gr.Row():
216
+ with gr.Column(min_width=1, scale=1):
217
+ video_upload_button = gr.Button("Analyze Video", variant="primary")
218
+ with gr.Column(min_width=1, scale=1):
219
+ example_video_button = gr.Button("Load Example Video")
220
+
221
+ video_upload_button.click(handle_video, inputs=video, outputs=video)
222
+ example_video_button.click(handle_video, None, outputs=video)
223
+
224
+ with gr.Column():
225
+ chat_section = gr.Group(visible=True)
226
+ with chat_section:
227
+ chatbot = gr.Chatbot(elem_id="chatbot",
228
+ container=True,
229
+ likeable=True,
230
+ value=[[None, welcome_message]],
231
+ avatar_images=(None, "./avatar.webp"))
232
+ with gr.Row():
233
+ txt = gr.Textbox(show_label=False, placeholder="Type here!")
234
+ with gr.Row():
235
+ send_btn = gr.Button("Send Message", elem_id="send-btn", variant="primary")
236
+ clear_btn = gr.Button("Clear Chat", elem_id="clear-btn")
237
+
238
+ with gr.Row():
239
+ behaivor_bank = gr.CheckboxGroup(label="Behavior Bank",
240
+ choices=[],
241
+ interactive=True,
242
+ info="A space to store all the behaviors you want to analyze.")
243
+ open_sidebar_btn = gr.Button("Show Behavior Manager", scale=0)
244
+ close_sidebar_btn = gr.Button("Hide Behavior Manager", visible=False, scale=0)
245
+
246
+ txt.submit(user_input, [txt, chatbot], [txt, chatbot], queue=False).then(
247
+ bot_response, chatbot, chatbot)
248
+ send_btn.click(user_input, [txt, chatbot], [txt, chatbot], queue=False).then(
249
+ bot_response, chatbot, chatbot)
250
+ clear_btn.click(lambda: None, None, chatbot, queue=False)
251
+
252
+ # Define a sidebar column that is initially hidden
253
+ with gr.Column(visible=False, min_width=200, scale=0.5, elem_classes="sidebar") as sidebar:
254
+ behavior_dropdown = gr.Dropdown(label="Behavior Collection",
255
+ choices=behaviors,
256
+ interactive=True,
257
+ container=True,
258
+ elem_classes="column-form",
259
+ info="Choose a behavior to add to the bank, edit or remove.")
260
+ with gr.Row():
261
+ add_toBank_button = gr.Button("Add Behavior to Bank", variant="primary")
262
+ edit_button = gr.Button("Edit Behavior")
263
+ delete_button = gr.Button("Remove Behavior")
264
+
265
+ with gr.Row():
266
+ name_input = gr.Textbox(label="Behavior Name",
267
+ placeholder="(e.g., IBR)",
268
+ info="The name you give to the specific behavior you're tracking or analyzing.")
269
+ timestamps_input = gr.Textbox(label="Timestamps MM:SS",
270
+ placeholder="(e.g., (01:15,01:35) )",
271
+ info="The exact times during a session when you saw the behavior. The first two digits represent minutes and the last two digits represent seconds.")
272
+ definition_input = gr.Textbox(lines=3,
273
+ label="Behavior Definition",
274
+ placeholder="(e.g., the child's skill in using behavior(s) to elicit aid in obtaining an object, or object related event)",
275
+ info="Provide a clear definition of the behavior.")
276
+
277
+ with gr.Row():
278
+ submit_button = gr.Button("Save Behavior", variant="primary")
279
+
280
+ submit_button.click(fn=add_or_update_behavior,
281
+ inputs=[name_input, definition_input, timestamps_input, behavior_dropdown],
282
+ outputs=[behavior_dropdown, behaivor_bank, name_input, definition_input, timestamps_input])
283
+
284
+ add_toBank_button.click(fn=add_to_behaivor_bank,
285
+ inputs=[behavior_dropdown, behaivor_bank],
286
+ outputs=[behaivor_bank, behavior_dropdown])
287
+
288
+ delete_button.click(fn=delete_behavior,
289
+ inputs=[behavior_dropdown, behaivor_bank],
290
+ outputs=[behavior_dropdown, behaivor_bank])
291
+
292
+ edit_button.click(fn=edit_behavior,
293
+ inputs=[behavior_dropdown],
294
+ outputs=[name_input, definition_input, timestamps_input])
295
+
296
+ # Function to open the sidebar
297
+ open_sidebar_btn.click(lambda: {
298
+ open_sidebar_btn: gr.Button(visible=False),
299
+ close_sidebar_btn: gr.Button(visible=True),
300
+ sidebar: gr.Column(visible=True)
301
+ }, outputs=[open_sidebar_btn, close_sidebar_btn, sidebar])
302
+
303
+ # Function to close the sidebar
304
+ close_sidebar_btn.click(lambda: {
305
+ open_sidebar_btn: gr.Button(visible=True),
306
+ close_sidebar_btn: gr.Button(visible=False),
307
+ sidebar: gr.Column(visible=False)
308
+ }, outputs=[open_sidebar_btn, close_sidebar_btn, sidebar])
309
+
310
+ # Launch the demo
311
+ demo.launch(share=True)
requirements.txt CHANGED
@@ -1 +1,166 @@
1
- huggingface_hub==0.22.2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==23.2.1
2
+ aiohttp==3.9.5
3
+ aiosignal==1.3.1
4
+ alembic==1.13.2
5
+ altair==5.3.0
6
+ annotated-types==0.7.0
7
+ antlr4-python3-runtime==4.9.3
8
+ anyio==4.4.0
9
+ asteroid-filterbanks==0.4.0
10
+ async-timeout==4.0.3
11
+ attrs==23.2.0
12
+ audioread==3.0.1
13
+ av==11.0.0
14
+ beautifulsoup4==4.12.3
15
+ certifi==2024.7.4
16
+ cffi==1.16.0
17
+ charset-normalizer==3.3.2
18
+ click==8.1.7
19
+ colorama==0.4.6
20
+ coloredlogs==15.0.1
21
+ colorlog==6.8.2
22
+ contourpy==1.2.1
23
+ ctranslate2==4.3.1
24
+ cycler==0.12.1
25
+ decorator==4.4.2
26
+ distro==1.9.0
27
+ dnspython==2.6.1
28
+ docopt==0.6.2
29
+ einops==0.8.0
30
+ email_validator==2.2.0
31
+ exceptiongroup==1.2.1
32
+ fastapi==0.111.0
33
+ fastapi-cli==0.0.4
34
+ faster-whisper==1.0.0
35
+ ffmpy==0.3.2
36
+ filelock==3.15.4
37
+ flatbuffers==24.3.25
38
+ fonttools==4.53.1
39
+ frozenlist==1.4.1
40
+ fsspec==2024.6.1
41
+ google==3.0.0
42
+ gradio==4.37.2
43
+ gradio_client==1.0.2
44
+ greenlet==3.0.3
45
+ h11==0.14.0
46
+ httpcore==1.0.5
47
+ httptools==0.6.1
48
+ httpx==0.27.0
49
+ huggingface-hub==0.23.4
50
+ humanfriendly==10.0
51
+ HyperPyYAML==1.2.2
52
+ idna==3.7
53
+ imageio==2.34.2
54
+ imageio-ffmpeg==0.5.1
55
+ importlib_resources==6.4.0
56
+ intel-openmp==2021.4.0
57
+ Jinja2==3.1.4
58
+ joblib==1.4.2
59
+ jsonschema==4.23.0
60
+ jsonschema-specifications==2023.12.1
61
+ julius==0.2.7
62
+ kiwisolver==1.4.5
63
+ lazy_loader==0.4
64
+ librosa==0.10.2.post1
65
+ lightning==2.3.3
66
+ lightning-utilities==0.11.3.post0
67
+ llvmlite==0.43.0
68
+ Mako==1.3.5
69
+ markdown-it-py==3.0.0
70
+ MarkupSafe==2.1.5
71
+ matplotlib==3.9.1
72
+ mdurl==0.1.2
73
+ mkl==2021.4.0
74
+ moviepy==1.0.3
75
+ mpmath==1.3.0
76
+ msgpack==1.0.8
77
+ multidict==6.0.5
78
+ networkx==3.2.1
79
+ nltk==3.8.1
80
+ numba==0.60.0
81
+ numpy==1.26.4
82
+ omegaconf==2.3.0
83
+ onnxruntime==1.18.1
84
+ openai==1.35.12
85
+ opencv-python==4.10.0.84
86
+ optuna==3.6.1
87
+ orjson==3.10.6
88
+ packaging==24.1
89
+ pandas==2.2.2
90
+ pillow==10.4.0
91
+ platformdirs==4.2.2
92
+ plotly==5.22.0
93
+ pooch==1.8.2
94
+ primePy==1.3
95
+ proglog==0.1.10
96
+ protobuf==5.27.2
97
+ pyannote.audio==3.1.1
98
+ pyannote.core==5.0.0
99
+ pyannote.database==5.1.0
100
+ pyannote.metrics==3.2.1
101
+ pyannote.pipeline==3.0.1
102
+ pycparser==2.22
103
+ pydantic==2.8.2
104
+ pydantic_core==2.20.1
105
+ pydub==0.25.1
106
+ Pygments==2.18.0
107
+ pyparsing==3.1.2
108
+ pyreadline3==3.4.1
109
+ python-dateutil==2.9.0.post0
110
+ python-dotenv==1.0.1
111
+ python-multipart==0.0.9
112
+ pytorch-lightning==2.3.3
113
+ pytorch-metric-learning==2.5.0
114
+ pytz==2024.1
115
+ PyYAML==6.0.1
116
+ referencing==0.35.1
117
+ regex==2024.5.15
118
+ requests==2.32.3
119
+ rich==13.7.1
120
+ rpds-py==0.19.0
121
+ ruamel.yaml==0.18.6
122
+ ruamel.yaml.clib==0.2.8
123
+ ruff==0.5.1
124
+ safetensors==0.4.3
125
+ scikit-learn==1.5.1
126
+ scipy==1.13.1
127
+ semantic-version==2.10.0
128
+ semver==3.0.2
129
+ sentencepiece==0.2.0
130
+ shellingham==1.5.4
131
+ six==1.16.0
132
+ sniffio==1.3.1
133
+ sortedcontainers==2.4.0
134
+ soundfile==0.12.1
135
+ soupsieve==2.5
136
+ soxr==0.3.7
137
+ speechbrain==1.0.0
138
+ SQLAlchemy==2.0.31
139
+ starlette==0.37.2
140
+ sympy==1.13.0
141
+ tabulate==0.9.0
142
+ tbb==2021.13.0
143
+ tenacity==8.5.0
144
+ tensorboardX==2.6.2.2
145
+ threadpoolctl==3.5.0
146
+ tokenizers==0.15.2
147
+ tomlkit==0.12.0
148
+ toolz==0.12.1
149
+ torch==2.3.1
150
+ torch-audiomentations==0.11.1
151
+ torch-pitch-shift==1.2.4
152
+ torchaudio==2.3.1
153
+ torchmetrics==1.4.0.post0
154
+ tqdm==4.66.4
155
+ transformers==4.39.3
156
+ typer==0.12.3
157
+ typing_extensions==4.12.2
158
+ tzdata==2024.1
159
+ ujson==5.10.0
160
+ urllib3==2.2.2
161
+ uvicorn==0.30.1
162
+ watchfiles==0.22.0
163
+ websockets==11.0.3
164
+ whisperx==3.1.1
165
+ yarl==1.9.4
166
+ zipp==3.19.2
style.css ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ body {
2
+ font-family: Arial, sans-serif;
3
+ color: #000; /* Set default text color to black */
4
+ }
5
+
6
+ .gradio-container {
7
+ background-color: #1a3e63; /* Background color similar to the image */
8
+ }
9
+
10
+ .gradio-block, .gradio-column, .gradio-row {
11
+ background-color: #fff; /* White background for blocks */
12
+ border-radius: 10px; /* Rounded corners */
13
+ box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); /* Subtle shadow for depth */
14
+ margin: 10px;
15
+ padding: 20px;
16
+ }
17
+
18
+ .gradio-markdown {
19
+ color: #000; /* Text color set to black */
20
+ }
21
+
22
+ .gradio-button {
23
+ background-color: #4CAF50; /* Button color */
24
+ color: white; /* Button text color */
25
+ border: none;
26
+ padding: 10px 20px;
27
+ text-align: center;
28
+ text-decoration: none;
29
+ display: inline-block;
30
+ font-size: 16px;
31
+ margin: 4px 2px;
32
+ cursor: pointer;
33
+ border-radius: 5px; /* Rounded corners */
34
+ }
35
+
36
+ .gradio-checkbox-group, .gradio-checkbox {
37
+ color: #000; /* Text color set to black */
38
+ }
39
+
40
+ .gradio-textbox {
41
+ color: #000; /* Text color set to black */
42
+ }
43
+
44
+ .gradio-header {
45
+ color: #000; /* Text color set to black */
46
+ }
47
+
48
+ .gradio-container p, .gradio-container h1, .gradio-container h2, .gradio-container h3, .gradio-container h4, .gradio-container h5, .gradio-container h6 {
49
+ color: #000; /* Ensure all header and paragraph texts are black */
50
+ }