Spaces:
Running
on
Zero
Running
on
Zero
File size: 2,253 Bytes
3f2900f b87f04a 3f2900f 3d0fb66 2d26215 3f2900f 3d0fb66 b87f04a aea8402 3d0fb66 b87f04a 55ffc20 f80709d b87f04a f14dc7e b87f04a 3f2900f b87f04a 760514e b87f04a ad9f68c 3f2900f b87f04a 80f2b5c 03b4c46 b87f04a 3f2900f c7e5485 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
import gradio as gr
from transformers import pipeline, AutoTokenizer
import torch
import spaces
import os
@spaces.GPU
def load_model(model_name):
return pipeline("text-generation", model=model_name, device="cuda", torch_dtype=torch.bfloat16, trust_remote_code=True, token=os.environ["token"])
@spaces.GPU
def generate(
model_name,
user_input,
temperature=0.4,
top_p=0.95,
top_k=50,
max_new_tokens=256,
):
pipe = load_model(model_name)
# Set tokenize correctly. Otherwise ticking the box breaks it.
if model_name == "M4-ai/tau-0.5B":
prompt = user_input
else:
prompt = f"<|im_start|>user\n{user_input}<|im_end|>\n<|im_start|>assistant\n"
outputs = pipe(prompt, max_new_tokens=max_new_tokens, do_sample=True,
temperature=temperature, top_k=top_k, top_p=top_p, repetition_penalty=1.10)
return outputs[0]["generated_text"]
model_choices = ["Locutusque/Hyperion-3.0-Mixtral-3x7B", "M4-ai/NeuralReyna-Mini-1.8B-v0.2", "aloobun/Cypher-Laser-Mixtral-2x1.8B-v0.1", "Locutusque/NeuralHyperion-Medium-Preview", "aloobun/Cypher-Mini-1.8B", "Locutusque/Hercules-2.0-Qwen1.5-1.8B", "Locutusque/Hercules-2.5-Mistral-7B", "M4-ai/tau-0.5B"]
# What at the best options?
g = gr.Interface(
fn=generate,
inputs=[
gr.components.Dropdown(choices=model_choices, label="Model", value=model_choices[0], interactive=True),
gr.components.Textbox(lines=2, label="Prompt", value="Write me a Python program that calculates the factorial of a given number."),
gr.components.Slider(minimum=0, maximum=1, value=0.4, label="Temperature"),
gr.components.Slider(minimum=0, maximum=1, value=0.95, label="Top p"),
gr.components.Slider(minimum=0, maximum=100, step=1, value=50, label="Top k"),
gr.components.Slider(minimum=1, maximum=2048, step=1, value=1024, label="Max tokens"),
],
outputs=[gr.Textbox(lines=10, label="Output")],
title="Locutusque's Language Models",
description="Try out Locutusque's (or other's) language models here! Credit goes to Mediocreatmybest for this space. You may also find some experimental preview models that have not been made public here.",
concurrency_limit=1
)
g.launch(max_threads=4)
|