Spaces:
Runtime error
Runtime error
File size: 1,642 Bytes
f48ca32 1f764fa f48ca32 aba5131 36176a4 aba5131 f48ca32 1f764fa f48ca32 1f764fa f48ca32 1f764fa f48ca32 1f764fa f48ca32 1f764fa f48ca32 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
import gradio as gr
from huggingface_hub import snapshot_download
from pathlib import Path
import spaces
from mistral.cli.chat import load_model, generate_stream
subprocess.run('pip install mamba-ssm --no-build-isolation', env={'MAMBA_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
subprocess.run('pip install causal-conv1d --no-build-isolation', env={'CAUSAL_CONV1D_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
mistral_models_path = Path.home().joinpath('mistral_models', 'mamba-codestral-7B-v0.1')
mistral_models_path.mkdir(parents=True, exist_ok=True)
snapshot_download(repo_id="mistralai/mamba-codestral-7B-v0.1",
allow_patterns=["params.json", "consolidated.safetensors", "tokenizer.model.v3"],
local_dir=mistral_models_path)
MODEL_PATH = str(mistral_models_path)
@spaces.GPU()
def generate_response(message, history):
model = load_model(MODEL_PATH)
history_mistral_format = [
{"role": "user" if i % 2 == 0 else "assistant", "content": m}
for i, m in enumerate(sum(history, []))
]
history_mistral_format.append({"role": "user", "content": message})
response = ""
for chunk in generate_stream(model, history_mistral_format, max_tokens=256):
response += chunk
return response
# Gradio interface
def chat_interface(message, history):
response = generate_response(message, history, model)
return response
iface = gr.ChatInterface(
chat_interface,
title="Mamba Codestral Chat (ZeroGPU)",
description="Chat with the Mamba Codestral 7B model using Hugging Face Spaces ZeroGPU feature.",
)
if __name__ == "__main__":
iface.launch() |