import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import torch

model_id = "mistralai/Mistral-7B-v0.1" 

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    torch_dtype=torch.float16,
    load_in_4bit=True
)

pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

def chat(prompt, history=[]):
    full_prompt = prompt
    output = pipe(full_prompt, max_new_tokens=200, do_sample=True, temperature=0.7)
    return output[0]["generated_text"]

gr.ChatInterface(
    fn=chat,
    title="🧠 Mistral 7B Instruct Chatbot",
    description="This chatbot is powered by the open-source Mistral 7B LLM. Ask anything!",
    theme="soft"
).launch()