chore: faster speech-to.text
Browse files
app.py
CHANGED
|
@@ -194,13 +194,15 @@ def browser_automation(original_user_query:str)->str:
|
|
| 194 |
print("vision_web_browser.py: ", result.stderr)
|
| 195 |
return result.stdout
|
| 196 |
|
| 197 |
-
|
| 198 |
text_to_speech_pipe = pipeline(
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
|
|
|
| 202 |
)
|
| 203 |
-
|
|
|
|
| 204 |
|
| 205 |
def speech_to_text(final_answer_text, agent_memory):
|
| 206 |
text = f"[clears throat] {final_answer_text}"
|
|
|
|
| 194 |
print("vision_web_browser.py: ", result.stderr)
|
| 195 |
return result.stdout
|
| 196 |
|
| 197 |
+
|
| 198 |
text_to_speech_pipe = pipeline(
|
| 199 |
+
task="text-to-speech",
|
| 200 |
+
model="suno/bark-small",
|
| 201 |
+
device = 0 if torch.cuda.is_available() else "cpu",
|
| 202 |
+
torch_dtype=torch.float16,
|
| 203 |
)
|
| 204 |
+
text_to_speech_pipe.model.enable_cpu_offload()
|
| 205 |
+
text_to_speech_pipe.model.use_flash_attention_2=True
|
| 206 |
|
| 207 |
def speech_to_text(final_answer_text, agent_memory):
|
| 208 |
text = f"[clears throat] {final_answer_text}"
|