import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline import torch model_id = "mistralai/Mistral-7B-v0.1" tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained( model_id, device_map="auto", torch_dtype=torch.float16, load_in_4bit=True ) pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) def chat(prompt, history=[]): full_prompt = prompt output = pipe(full_prompt, max_new_tokens=200, do_sample=True, temperature=0.7) return output[0]["generated_text"] gr.ChatInterface( fn=chat, title="🧠 Mistral 7B Instruct Chatbot", description="This chatbot is powered by the open-source Mistral 7B LLM. Ask anything!", theme="soft" ).launch()