Spaces:

Kkordik
/

mamba-codestral-7B-v0.1

Runtime error

mamba-codestral-7B-v0.1 / app.py

Update app.py

36176a4 verified 9 months ago

1.64 kB

	import gradio as gr
	from huggingface_hub import snapshot_download
	from pathlib import Path
	import spaces
	from mistral.cli.chat import load_model, generate_stream

	subprocess.run('pip install mamba-ssm --no-build-isolation', env={'MAMBA_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
	subprocess.run('pip install causal-conv1d --no-build-isolation', env={'CAUSAL_CONV1D_SKIP_CUDA_BUILD': "TRUE"}, shell=True)


	mistral_models_path = Path.home().joinpath('mistral_models', 'mamba-codestral-7B-v0.1')
	mistral_models_path.mkdir(parents=True, exist_ok=True)

	snapshot_download(repo_id="mistralai/mamba-codestral-7B-v0.1",
	allow_patterns=["params.json", "consolidated.safetensors", "tokenizer.model.v3"],
	local_dir=mistral_models_path)
	MODEL_PATH = str(mistral_models_path)


	@spaces.GPU()
	def generate_response(message, history):
	model = load_model(MODEL_PATH)
	history_mistral_format = [
	{"role": "user" if i % 2 == 0 else "assistant", "content": m}
	for i, m in enumerate(sum(history, []))
	]
	history_mistral_format.append({"role": "user", "content": message})

	response = ""
	for chunk in generate_stream(model, history_mistral_format, max_tokens=256):
	response += chunk
	return response

	# Gradio interface
	def chat_interface(message, history):
	response = generate_response(message, history, model)
	return response

	iface = gr.ChatInterface(
	chat_interface,
	title="Mamba Codestral Chat (ZeroGPU)",
	description="Chat with the Mamba Codestral 7B model using Hugging Face Spaces ZeroGPU feature.",
	)

	if __name__ == "__main__":
	iface.launch()