|
import gradio as gr |
|
import torch |
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
from peft import PeftModel |
|
|
|
|
|
BASE_MODEL = "bigcode/starcoder2-3b" |
|
ADAPTER_REPO = "simnJS/autotrain-fxp6j-p5s8i" |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL) |
|
|
|
|
|
base_model = AutoModelForCausalLM.from_pretrained( |
|
BASE_MODEL, |
|
torch_dtype=torch.float16, |
|
device_map="auto" |
|
) |
|
|
|
|
|
model = PeftModel.from_pretrained( |
|
base_model, |
|
ADAPTER_REPO, |
|
torch_dtype=torch.float16 |
|
) |
|
|
|
|
|
def generate_answer(user_message, history): |
|
""" |
|
user_message: le dernier message de l'utilisateur |
|
history: liste de tuples (message_utilisateur, réponse_modèle) |
|
""" |
|
|
|
|
|
prompt = user_message |
|
|
|
|
|
inputs = tokenizer(prompt, return_tensors="pt").to(model.device) |
|
|
|
outputs = model.generate( |
|
**inputs, |
|
max_new_tokens=100, |
|
temperature=0.7, |
|
do_sample=True, |
|
top_p=0.9 |
|
) |
|
answer = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
|
history.append((user_message, answer)) |
|
return history, history |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# Chat avec mon modèle LoRA Verse") |
|
chatbot = gr.Chatbot() |
|
msg = gr.Textbox(label="Tapez votre message ici...") |
|
state = gr.State([]) |
|
|
|
def submit_message(user_message, history): |
|
return generate_answer(user_message, history) |
|
|
|
msg.submit(submit_message, inputs=[msg, state], outputs=[chatbot, state]) |
|
|
|
|
|
|
|
|
|
demo.launch() |
|
|