Spaces:

Mahavaury2
/

llama2-consent-chatbot

Runtime error

App Files Files Community

Mahavaury2 commited on Jan 20

Commit

308a509

verified ·

1 Parent(s): 6f01f8a

Create app.py

Browse files

Files changed (1) hide show

app.py +63 -0

app.py ADDED Viewed

	@@ -0,0 +1,63 @@

+import gradio as gr
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
+# MODEL REPO
+MODEL_NAME = "mistralai/Mistral-7B-v0.1"
+# Load tokenizer
+print("Loading tokenizer...")
+tokenizer = AutoTokenizer.from_pretrained(
+    MODEL_NAME,
+    trust_remote_code=True
+)
+# Load model in 4-bit on CPU
+# (Even though we set device_map="auto", on a free Space there's no GPU, so it stays on CPU.)
+print("Loading model in 4-bit...")
+model = AutoModelForCausalLM.from_pretrained(
+    MODEL_NAME,
+    torch_dtype=torch.float16,
+    device_map="auto",        # auto-detect available devices
+    load_in_4bit=True,        # bitsandbytes for quantization
+    trust_remote_code=True    # Mistral uses custom code
+)
+model.eval()
+def chat_mistral(prompt):
+    """
+    Generates a response from Mistral 7B given a user prompt.
+    """
+    # Tokenize
+    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+    # Generate
+    with torch.no_grad():
+        outputs = model.generate(
+            **inputs,
+            max_new_tokens=128,       # limit output length to avoid OOM
+            temperature=0.7,
+            repetition_penalty=1.1
+        )
+    # Decode
+    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return response
+# Create a Gradio interface
+demo = gr.Interface(
+    fn=chat_mistral,
+    inputs=gr.Textbox(lines=3, label="Your Prompt"),
+    outputs=gr.Textbox(label="Mistral 7B Response"),
+    title="Mistral 7B (4-bit) Chat",
+    description=(
+        "A minimal Mistral-7B demo running on free CPU. "
+        "Inference will be slow and might run out of memory. "
+        "Use short prompts!"
+    )
+)
+# Launch the Gradio app
+if __name__ == "__main__":
+    demo.launch()