Spaces:
Running
on
Zero
Running
on
Zero
File size: 4,014 Bytes
5e3a4bc 99252ee 5e3a4bc 0332d7d 5e3a4bc 99252ee 5e3a4bc 562a177 5e3a4bc ba5d9a3 5e3a4bc bcbabd5 5e3a4bc 14ef04c 5e3a4bc 562a177 0332d7d 5e3a4bc 62e19d9 5e3a4bc df2fefd 5e3a4bc 99252ee 5e3a4bc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
import gradio as gr
import torch
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
TextIteratorStreamer,
)
import os
from threading import Thread
import spaces
import time
import subprocess
PLACEHOLDER = """
<div style="padding: 40px; text-align: center; display: flex; flex-direction: column; align-items: center;">
<img src="https://i.imgur.com/yu0sVwC.png" style="width: 90%; max-width: 650px; height: auto; opacity: 0.8; border-radius: 20px;">
<p style="font-size: 18px; margin-bottom: 2px; opacity: 0.65;">Fammi una domanda!</p>
</div>
"""
css = """
.message-row {
justify-content: space-evenly !important;
}
.message-bubble-border {
border-radius: 6px !important;
}
.dark.message-bubble-border {
border-color: #21293b !important;
}
.dark.user {
background: #0a1120 !important;
}
.dark.assistant {
background: transparent !important;
}
"""
DESCRIPTION = """<div>
<p>🇮🇹 Italian LLM <a href="https://huggingface.co/mii-llm/maestrale-chat-v0.4-beta"><b>Maestrale Chat v0.4 beta</b></a>. Maestrale is a powerful language model for Italian, trained by mii-llm, based on Mistral 7B.</p>
<p>🔎 For more details about Maestrale and how to use it with <code>transformers</code>, visit the <a href="https://huggingface.co/mii-llm/maestrale-chat-v0.4-beta">model card</a>.</p>
</div>"""
tokenizer = AutoTokenizer.from_pretrained("mii-llm/maestrale-chat-v0.4-beta")
model = AutoModelForCausalLM.from_pretrained("mii-llm/maestrale-chat-v0.4-beta", torch_dtype=torch.bfloat16, device_map="auto")
terminators = [
tokenizer.eos_token_id,
tokenizer.convert_tokens_to_ids("<|im_end|>")
]
if torch.cuda.is_available():
device = torch.device("cuda")
print(f"Using GPU: {torch.cuda.get_device_name(device)}")
else:
device = torch.device("cpu")
print("Using CPU")
model = model.to(device)
@spaces.GPU()
def chat(message, history, system, temperature, do_sample, max_tokens):
chat = [{"role": "system", "content": system}] if system else []
chat.extend(
{"role": role, "content": content}
for user, assistant in history
for role, content in [("user", user), ("assistant", assistant)]
)
chat.append({"role": "user", "content": message})
messages = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([messages], return_tensors="pt").to(device)
streamer = TextIteratorStreamer(
tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True
)
generate_kwargs = {
**model_inputs,
"streamer": streamer,
"max_new_tokens": max_tokens,
"do_sample": do_sample,
"temperature": temperature,
"eos_token_id": terminators,
"pad_token_id": tokenizer.eos_token_id
}
thread = Thread(target=model.generate, kwargs=generate_kwargs)
thread.start()
partial_text = ""
for new_text in streamer:
partial_text += new_text
yield partial_text
yield partial_text
chatbot = gr.Chatbot(height=550, placeholder=PLACEHOLDER, label='Conversazione', show_copy_button=True)
demo = gr.ChatInterface(
fn=chat,
chatbot=chatbot,
fill_height=True,
theme=gr.themes.Soft(),
css=css,
additional_inputs_accordion=gr.Accordion(
label="⚙️ Parametri", open=False, render=False
),
additional_inputs=[
gr.Textbox(
label="System",
value="Sei un assistente utile.",
),
gr.Slider(
minimum=0, maximum=1, step=0.1, value=0.7, label="Temperature", render=False
),
gr.Checkbox(label="Sampling", value=True),
gr.Slider(
minimum=128,
maximum=4096,
step=1,
value=768,
label="Max new tokens",
render=False,
),
],
stop_btn="Stop Generation",
cache_examples=False,
title="Maestrale Chat v0.4 beta",
description=DESCRIPTION
)
demo.launch() |