{title}

import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

# 1) Define pastel gradient CSS
css = """
.gradio-container {
    background: linear-gradient(to right, #FFDEE9, #B5FFFC);
}
"""

title = "Bonjour Dans le chat du consentement"

# 2) Load the Mistral model & tokenizer from HF Hub
model_id = "mistralai/Mistral-7B-Instruct-v0.3"

# If you're on a GPU Space, you can do:
#    device_map = "auto"
#    torch_dtype = torch.bfloat16
# If you're on a CPU-only Space, remove those arguments or set device_map="cpu"
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",          # "auto" if you have GPU
    torch_dtype=torch.bfloat16, # for GPU. Remove or use float32 on CPU
    trust_remote_code=True
)

# 3) Create a text-generation pipeline
generate_text = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=512,        # adjust as needed
    temperature=0.7,       # adjust as needed
    do_sample=True
)

def mistral_inference(prompt):
    """
    Passes user prompt to the pipeline and returns the generated text.
    We'll strip any special tokens and limit the output.
    """
    # The pipeline returns a list of dicts [{"generated_text": "..."}]
    outputs = generate_text(prompt)
    text_out = outputs[0]["generated_text"]
    return text_out

# 4) Build the Gradio interface with a pastel background & greeting
with gr.Blocks(css=css) as demo:
    gr.Markdown(f"<h1 style='text-align:center;'>{title}</h1>")
    user_input = gr.Textbox(label="Entrez votre message ici:", lines=3)
    output = gr.Textbox(label="Réponse du Modèle:", lines=5)
    send_button = gr.Button("Envoyer")

    # Link the button to the inference function
    send_button.click(fn=mistral_inference, inputs=user_input, outputs=output)

# 5) Launch the app
if __name__ == "__main__":
    demo.launch()