Kkordik commited on
Commit
9afd959
·
verified ·
1 Parent(s): 36176a4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -17
app.py CHANGED
@@ -2,42 +2,60 @@ import gradio as gr
2
  from huggingface_hub import snapshot_download
3
  from pathlib import Path
4
  import spaces
5
- from mistral.cli.chat import load_model, generate_stream
 
6
 
7
- subprocess.run('pip install mamba-ssm --no-build-isolation', env={'MAMBA_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
 
8
  subprocess.run('pip install causal-conv1d --no-build-isolation', env={'CAUSAL_CONV1D_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
9
 
 
 
 
 
 
 
10
 
 
11
  mistral_models_path = Path.home().joinpath('mistral_models', 'mamba-codestral-7B-v0.1')
12
  mistral_models_path.mkdir(parents=True, exist_ok=True)
13
 
14
  snapshot_download(repo_id="mistralai/mamba-codestral-7B-v0.1",
15
  allow_patterns=["params.json", "consolidated.safetensors", "tokenizer.model.v3"],
16
  local_dir=mistral_models_path)
 
17
  MODEL_PATH = str(mistral_models_path)
18
 
 
 
 
19
 
20
  @spaces.GPU()
21
  def generate_response(message, history):
22
- model = load_model(MODEL_PATH)
23
- history_mistral_format = [
24
- {"role": "user" if i % 2 == 0 else "assistant", "content": m}
25
- for i, m in enumerate(sum(history, []))
26
- ]
27
- history_mistral_format.append({"role": "user", "content": message})
 
 
 
 
 
 
28
 
29
- response = ""
30
- for chunk in generate_stream(model, history_mistral_format, max_tokens=256):
31
- response += chunk
32
- return response
 
 
 
33
 
34
  # Gradio interface
35
- def chat_interface(message, history):
36
- response = generate_response(message, history, model)
37
- return response
38
-
39
  iface = gr.ChatInterface(
40
- chat_interface,
41
  title="Mamba Codestral Chat (ZeroGPU)",
42
  description="Chat with the Mamba Codestral 7B model using Hugging Face Spaces ZeroGPU feature.",
43
  )
 
2
  from huggingface_hub import snapshot_download
3
  from pathlib import Path
4
  import spaces
5
+ import subprocess
6
+ import os
7
 
8
+ # Install required packages
9
+ subprocess.run('pip install mistral_inference mamba-ssm --no-build-isolation', env={'MAMBA_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
10
  subprocess.run('pip install causal-conv1d --no-build-isolation', env={'CAUSAL_CONV1D_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
11
 
12
+ # Import after installation
13
+ from mistral_inference.transformer import Transformer
14
+ from mistral_inference.generate import generate
15
+ from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
16
+ from mistral_common.protocol.instruct.messages import UserMessage, AssistantMessage
17
+ from mistral_common.protocol.instruct.request import ChatCompletionRequest
18
 
19
+ # Download the model
20
  mistral_models_path = Path.home().joinpath('mistral_models', 'mamba-codestral-7B-v0.1')
21
  mistral_models_path.mkdir(parents=True, exist_ok=True)
22
 
23
  snapshot_download(repo_id="mistralai/mamba-codestral-7B-v0.1",
24
  allow_patterns=["params.json", "consolidated.safetensors", "tokenizer.model.v3"],
25
  local_dir=mistral_models_path)
26
+
27
  MODEL_PATH = str(mistral_models_path)
28
 
29
+ # Load model and tokenizer
30
+ tokenizer = MistralTokenizer.from_file(os.path.join(MODEL_PATH, "tokenizer.model.v3"))
31
+ model = Transformer.from_folder(MODEL_PATH)
32
 
33
  @spaces.GPU()
34
  def generate_response(message, history):
35
+ # Convert history to the format expected by the model
36
+ messages = []
37
+ for human, assistant in history:
38
+ messages.append(UserMessage(content=human))
39
+ messages.append(AssistantMessage(content=assistant))
40
+ messages.append(UserMessage(content=message))
41
+
42
+ # Create chat completion request
43
+ completion_request = ChatCompletionRequest(messages=messages)
44
+
45
+ # Tokenize input
46
+ tokens = tokenizer.encode_chat_completion(completion_request).tokens
47
 
48
+ # Generate response
49
+ out_tokens, * = generate([tokens], model, max_tokens=256, temperature=0.7, eos_id=tokenizer.instruct_tokenizer.tokenizer.eos_id)
50
+
51
+ # Decode response
52
+ result = tokenizer.instruct_tokenizer.tokenizer.decode(out_tokens[0])
53
+
54
+ return result
55
 
56
  # Gradio interface
 
 
 
 
57
  iface = gr.ChatInterface(
58
+ generate_response,
59
  title="Mamba Codestral Chat (ZeroGPU)",
60
  description="Chat with the Mamba Codestral 7B model using Hugging Face Spaces ZeroGPU feature.",
61
  )