mjavaid commited on
Commit
b867d63
·
1 Parent(s): 73453c2
Files changed (2) hide show
  1. app.py +63 -0
  2. requirements.txt +6 -0
app.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ import torch
4
+ import spaces
5
+
6
+ # Define model repository
7
+ repo_name = "hanzla/Falcon3-Mamba-R1-v0"
8
+
9
+ # Load tokenizer and model
10
+ tokenizer = AutoTokenizer.from_pretrained(repo_name)
11
+ model = AutoModelForCausalLM.from_pretrained(
12
+ repo_name,
13
+ device_map="auto", # Auto place layers across available GPUs
14
+ torch_dtype=torch.float16,
15
+ )
16
+
17
+ @spaces.GPU
18
+ def generate_response(message, history):
19
+ messages = [
20
+ {"role": "system", "content": "You are a helpful assistant. You think before answering"},
21
+ ]
22
+
23
+ # Add chat history to messages
24
+ for h in history:
25
+ messages.append({"role": "user", "content": h[0]})
26
+ messages.append({"role": "assistant", "content": h[1]})
27
+
28
+ # Add current message
29
+ messages.append({"role": "user", "content": message})
30
+
31
+ # Generate input text using chat template
32
+ input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
33
+
34
+ # Tokenize input
35
+ input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to(model.device)
36
+
37
+ # Generate response
38
+ outputs = model.generate(
39
+ input_ids,
40
+ max_new_tokens=1024,
41
+ temperature=0.7,
42
+ do_sample=True,
43
+ )
44
+
45
+ # Decode the generated tokens
46
+ generated_tokens = outputs[0][len(input_ids[0]):]
47
+ response = tokenizer.decode(generated_tokens, skip_special_tokens=True)
48
+
49
+ return response
50
+
51
+ # Create Gradio interface
52
+ demo = gr.ChatInterface(
53
+ generate_response,
54
+ title="Falcon3-Mamba-R1-v0 Chat",
55
+ description="Chat with the Falcon3-Mamba-R1-v0 model..",
56
+ examples=["Tell me about yourself",
57
+ "Explain quantum computing like I'm 10",
58
+ "Write a short poem about AI"],
59
+ theme="soft"
60
+ )
61
+
62
+ # Launch the interface
63
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gradio>=4.0.0
2
+ transformers>=4.34.0
3
+ torch>=2.0.0
4
+ accelerate
5
+ causal-conv1d>=1.4.0
6
+ mamba-ssm