Spaces:
Paused
Paused
Commit
·
6d7b9dd
1
Parent(s):
449d4d5
Add code
Browse files
app.py
CHANGED
@@ -10,7 +10,8 @@ import librosa
|
|
10 |
|
11 |
pipe = transformers.pipeline(model='fixie-ai/ultravox-v0_4_1-llama-3_1-8b', trust_remote_code=True,
|
12 |
device=torch.device('cuda'))
|
13 |
-
|
|
|
14 |
|
15 |
account_sid = os.environ.get("TWILIO_ACCOUNT_SID")
|
16 |
auth_token = os.environ.get("TWILIO_AUTH_TOKEN")
|
@@ -35,16 +36,14 @@ def transcribe(audio: tuple[int, np.ndarray], conversation: list[dict], gradio_c
|
|
35 |
|
36 |
audio_sr = librosa.resample(audio[1].astype(np.float32) / 32768.0,
|
37 |
orig_sr=original_sr, target_sr=target_sr)
|
38 |
-
inputs = pipe.preprocess({"audio": audio_sr, "turns": conversation, "sampling_rate": target_sr})
|
39 |
-
print("preprocess", inputs)
|
40 |
|
41 |
output = pipe({"audio": audio_sr, "turns": conversation, "sampling_rate": target_sr},
|
42 |
max_new_tokens=512)
|
43 |
-
|
44 |
|
45 |
-
conversation.append({"role": "user", "content":
|
46 |
conversation.append({"role": "assistant", "content": output})
|
47 |
-
gradio_convo.append({"role": "user", "content":
|
48 |
gradio_convo.append({"role": "assistant", "content": output})
|
49 |
|
50 |
yield AdditionalOutputs(conversation, gradio_convo)
|
|
|
10 |
|
11 |
pipe = transformers.pipeline(model='fixie-ai/ultravox-v0_4_1-llama-3_1-8b', trust_remote_code=True,
|
12 |
device=torch.device('cuda'))
|
13 |
+
whisper = transformers.pipeline(model="openai/whisper-large-v3-turbo",
|
14 |
+
device=torch.device('cuda'))
|
15 |
|
16 |
account_sid = os.environ.get("TWILIO_ACCOUNT_SID")
|
17 |
auth_token = os.environ.get("TWILIO_AUTH_TOKEN")
|
|
|
36 |
|
37 |
audio_sr = librosa.resample(audio[1].astype(np.float32) / 32768.0,
|
38 |
orig_sr=original_sr, target_sr=target_sr)
|
|
|
|
|
39 |
|
40 |
output = pipe({"audio": audio_sr, "turns": conversation, "sampling_rate": target_sr},
|
41 |
max_new_tokens=512)
|
42 |
+
transcription = whisper({"array": audio_sr, "sampling_rate": target_sr})
|
43 |
|
44 |
+
conversation.append({"role": "user", "content": transcription})
|
45 |
conversation.append({"role": "assistant", "content": output})
|
46 |
+
gradio_convo.append({"role": "user", "content": transcription})
|
47 |
gradio_convo.append({"role": "assistant", "content": output})
|
48 |
|
49 |
yield AdditionalOutputs(conversation, gradio_convo)
|