freddyaboulton HF staff commited on
Commit
6d7b9dd
·
1 Parent(s): 449d4d5
Files changed (1) hide show
  1. app.py +5 -6
app.py CHANGED
@@ -10,7 +10,8 @@ import librosa
10
 
11
  pipe = transformers.pipeline(model='fixie-ai/ultravox-v0_4_1-llama-3_1-8b', trust_remote_code=True,
12
  device=torch.device('cuda'))
13
-
 
14
 
15
  account_sid = os.environ.get("TWILIO_ACCOUNT_SID")
16
  auth_token = os.environ.get("TWILIO_AUTH_TOKEN")
@@ -35,16 +36,14 @@ def transcribe(audio: tuple[int, np.ndarray], conversation: list[dict], gradio_c
35
 
36
  audio_sr = librosa.resample(audio[1].astype(np.float32) / 32768.0,
37
  orig_sr=original_sr, target_sr=target_sr)
38
- inputs = pipe.preprocess({"audio": audio_sr, "turns": conversation, "sampling_rate": target_sr})
39
- print("preprocess", inputs)
40
 
41
  output = pipe({"audio": audio_sr, "turns": conversation, "sampling_rate": target_sr},
42
  max_new_tokens=512)
43
- print("output", output)
44
 
45
- conversation.append({"role": "user", "content": "hi"})
46
  conversation.append({"role": "assistant", "content": output})
47
- gradio_convo.append({"role": "user", "content": "hi"})
48
  gradio_convo.append({"role": "assistant", "content": output})
49
 
50
  yield AdditionalOutputs(conversation, gradio_convo)
 
10
 
11
  pipe = transformers.pipeline(model='fixie-ai/ultravox-v0_4_1-llama-3_1-8b', trust_remote_code=True,
12
  device=torch.device('cuda'))
13
+ whisper = transformers.pipeline(model="openai/whisper-large-v3-turbo",
14
+ device=torch.device('cuda'))
15
 
16
  account_sid = os.environ.get("TWILIO_ACCOUNT_SID")
17
  auth_token = os.environ.get("TWILIO_AUTH_TOKEN")
 
36
 
37
  audio_sr = librosa.resample(audio[1].astype(np.float32) / 32768.0,
38
  orig_sr=original_sr, target_sr=target_sr)
 
 
39
 
40
  output = pipe({"audio": audio_sr, "turns": conversation, "sampling_rate": target_sr},
41
  max_new_tokens=512)
42
+ transcription = whisper({"array": audio_sr, "sampling_rate": target_sr})
43
 
44
+ conversation.append({"role": "user", "content": transcription})
45
  conversation.append({"role": "assistant", "content": output})
46
+ gradio_convo.append({"role": "user", "content": transcription})
47
  gradio_convo.append({"role": "assistant", "content": output})
48
 
49
  yield AdditionalOutputs(conversation, gradio_convo)