Spaces:
Runtime error
Runtime error
Update app.py
Browse filesAdd `bfloat16` support for lighter (maybe faster too?) inference. I used to add this argument on `pipeline`, see for example https://gist.github.com/younesbelkada/dba25f75d3749b4e2d2d4821f0d6f385#file-benchmark-py-L42 /
app.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
import os
|
| 2 |
import gradio as gr
|
|
|
|
| 3 |
import numpy as np
|
| 4 |
from transformers import pipeline
|
| 5 |
|
|
@@ -7,8 +8,8 @@ import torch
|
|
| 7 |
print(f"Is CUDA available: {torch.cuda.is_available()}")
|
| 8 |
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
|
| 9 |
|
| 10 |
-
pipe_flan = pipeline("text2text-generation", model="google/flan-t5-xl", device="cuda:0")
|
| 11 |
-
pipe_vanilla = pipeline("text2text-generation", model="t5-large", device="cuda:0")
|
| 12 |
|
| 13 |
examples = [
|
| 14 |
["Please answer to the following question. Who is going to be the next Ballon d'or?"],
|
|
|
|
| 1 |
import os
|
| 2 |
import gradio as gr
|
| 3 |
+
import torch
|
| 4 |
import numpy as np
|
| 5 |
from transformers import pipeline
|
| 6 |
|
|
|
|
| 8 |
print(f"Is CUDA available: {torch.cuda.is_available()}")
|
| 9 |
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
|
| 10 |
|
| 11 |
+
pipe_flan = pipeline("text2text-generation", model="google/flan-t5-xl", device="cuda:0", model_kwargs={"torch_dtype":torch.bfloat16})
|
| 12 |
+
pipe_vanilla = pipeline("text2text-generation", model="t5-large", device="cuda:0", model_kwargs={"torch_dtype":torch.bfloat16})
|
| 13 |
|
| 14 |
examples = [
|
| 15 |
["Please answer to the following question. Who is going to be the next Ballon d'or?"],
|