Spaces:
Running
Running
File size: 1,029 Bytes
a0a6a64 e02e941 a0a6a64 e02e941 a0a6a64 e02e941 a0a6a64 65bcfd2 a0a6a64 9ba385a a0a6a64 e02e941 9ba385a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
import os
import spaces
import torch
import gradio as gr
# cpu
zero = torch.Tensor([0]).cuda()
print(zero.device) # <-- 'cpu' 🤔
# gpu
@spaces.GPU
def greet(user):
# print(zero.device) # <-- 'cuda:0' 🤗
from vllm import SamplingParams, LLM
from transformers.utils import move_cache
from huggingface_hub import snapshot_download, login
LLM_MODEL_ID = "mistral-community/Mistral-7B-v0.2"
fp = snapshot_download(LLM_MODEL_ID)
move_cache()
model = LLM(fp)
sampling_params = dict(
temperature = 0.3,
ignore_eos = False,
max_tokens = int(512 * 2)
)
sampling_params = SamplingParams(**sampling_params)
prompts = [user]
model_outputs = model.generate(prompts, sampling_params)
generations = []
for output in model_outputs:
for outputs in output.outputs:
generations.append(outputs.text)
return generations[0]
demo = gr.Interface(fn=greet, inputs=gr.Text(), outputs=gr.Text())
demo.launch(share=True) |