Kkordik's picture
Update app.py
36176a4 verified
raw
history blame
1.64 kB
import gradio as gr
from huggingface_hub import snapshot_download
from pathlib import Path
import spaces
from mistral.cli.chat import load_model, generate_stream
subprocess.run('pip install mamba-ssm --no-build-isolation', env={'MAMBA_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
subprocess.run('pip install causal-conv1d --no-build-isolation', env={'CAUSAL_CONV1D_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
mistral_models_path = Path.home().joinpath('mistral_models', 'mamba-codestral-7B-v0.1')
mistral_models_path.mkdir(parents=True, exist_ok=True)
snapshot_download(repo_id="mistralai/mamba-codestral-7B-v0.1",
allow_patterns=["params.json", "consolidated.safetensors", "tokenizer.model.v3"],
local_dir=mistral_models_path)
MODEL_PATH = str(mistral_models_path)
@spaces.GPU()
def generate_response(message, history):
model = load_model(MODEL_PATH)
history_mistral_format = [
{"role": "user" if i % 2 == 0 else "assistant", "content": m}
for i, m in enumerate(sum(history, []))
]
history_mistral_format.append({"role": "user", "content": message})
response = ""
for chunk in generate_stream(model, history_mistral_format, max_tokens=256):
response += chunk
return response
# Gradio interface
def chat_interface(message, history):
response = generate_response(message, history, model)
return response
iface = gr.ChatInterface(
chat_interface,
title="Mamba Codestral Chat (ZeroGPU)",
description="Chat with the Mamba Codestral 7B model using Hugging Face Spaces ZeroGPU feature.",
)
if __name__ == "__main__":
iface.launch()