import gradio as gr | |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
import torch | |
model_id = "mistralai/Mistral-7B-Instruct-v0.1" | |
tokenizer = AutoTokenizer.from_pretrained(model_id) | |
model = AutoModelForCausalLM.from_pretrained( | |
model_id, | |
device_map="auto", | |
torch_dtype=torch.float16, | |
load_in_4bit=True | |
) | |
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) | |
def chat(prompt, history=[]): | |
full_prompt = prompt | |
output = pipe(full_prompt, max_new_tokens=200, do_sample=True, temperature=0.7) | |
return output[0]["generated_text"] | |
gr.ChatInterface( | |
fn=chat, | |
title="🧠 Mistral 7B Instruct Chatbot", | |
description="This chatbot is powered by the open-source Mistral 7B LLM. Ask anything!", | |
theme="soft" | |
).launch() | |