Spaces:

Mahavaury2
/

mistralai-Mistral-7B-Instruct-v0.3

Running

App Files Files Community

Mahavaury2 commited on Jan 22

Commit

5f14f54

verified ·

1 Parent(s): 29ac499

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -25

app.py CHANGED Viewed

@@ -1,40 +1,61 @@
 import gradio as gr
-from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
-checkpoint = "mistralai/Mistral-7B-Instruct-v0.3"
-# Download tokenizer & model
-tokenizer = AutoTokenizer.from_pretrained(checkpoint, trust_remote_code=True)
-model = AutoModelForCausalLM.from_pretrained(
-    checkpoint,
-    device_map="auto",             # or "cpu" / "cuda"
-    trust_remote_code=True
-)
-# (Optional) set model to inference mode, etc.
-# model.eval()
-def inference_fn(prompt):
-    # Tokenize
-    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
-    # Generate
-    output_tokens = model.generate(**inputs, max_new_tokens=128)
-    # Decode
-    return tokenizer.decode(output_tokens[0], skip_special_tokens=True)
-# Pastel gradient CSS
 css = """
 .gradio-container {
     background: linear-gradient(to right, #FFDEE9, #B5FFFC);
 }
 """
 with gr.Blocks(css=css) as demo:
-    gr.Markdown("<h1 style='text-align: center;'>Bonjour Dans le chat du consentement</h1>")
     user_input = gr.Textbox(label="Entrez votre message ici:", lines=3)
     output = gr.Textbox(label="Réponse du Modèle:", lines=5)
     send_button = gr.Button("Envoyer")
-    send_button.click(fn=inference_fn, inputs=user_input, outputs=output)
-demo.launch()

 import gradio as gr
 import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+# 1) Define pastel gradient CSS
 css = """
 .gradio-container {
     background: linear-gradient(to right, #FFDEE9, #B5FFFC);
 }
 """
+title = "Bonjour Dans le chat du consentement"
+# 2) Load the Mistral model & tokenizer from HF Hub
+model_id = "mistralai/Mistral-7B-Instruct-v0.3"
+# If you're on a GPU Space, you can do:
+#    device_map = "auto"
+#    torch_dtype = torch.bfloat16
+# If you're on a CPU-only Space, remove those arguments or set device_map="cpu"
+tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    device_map="auto",          # "auto" if you have GPU
+    torch_dtype=torch.bfloat16, # for GPU. Remove or use float32 on CPU
+    trust_remote_code=True
+)
+# 3) Create a text-generation pipeline
+generate_text = pipeline(
+    "text-generation",
+    model=model,
+    tokenizer=tokenizer,
+    max_length=512,        # adjust as needed
+    temperature=0.7,       # adjust as needed
+    do_sample=True
+)
+def mistral_inference(prompt):
+    """
+    Passes user prompt to the pipeline and returns the generated text.
+    We'll strip any special tokens and limit the output.
+    """
+    # The pipeline returns a list of dicts [{"generated_text": "..."}]
+    outputs = generate_text(prompt)
+    text_out = outputs[0]["generated_text"]
+    return text_out
+# 4) Build the Gradio interface with a pastel background & greeting
 with gr.Blocks(css=css) as demo:
+    gr.Markdown(f"<h1 style='text-align:center;'>{title}</h1>")
     user_input = gr.Textbox(label="Entrez votre message ici:", lines=3)
     output = gr.Textbox(label="Réponse du Modèle:", lines=5)
     send_button = gr.Button("Envoyer")
+    # Link the button to the inference function
+    send_button.click(fn=mistral_inference, inputs=user_input, outputs=output)
+# 5) Launch the app
+if __name__ == "__main__":
+    demo.launch()