Spestly commited on
Commit
9c405c2
Β·
verified Β·
1 Parent(s): 50b71d4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -43
app.py CHANGED
@@ -20,56 +20,37 @@ MODELS = {
20
  @spaces.GPU
21
  def generate_response(model_id, conversation, user_message, max_length=512, temperature=0.7):
22
  """Generate response using ZeroGPU - all CUDA operations happen here"""
23
-
24
- # Load model and tokenizer inside the GPU function
25
  print(f"πŸš€ Loading {model_id}...")
26
  start_time = time.time()
27
-
28
  tokenizer = AutoTokenizer.from_pretrained(model_id)
29
  if tokenizer.pad_token is None:
30
  tokenizer.pad_token = tokenizer.eos_token
31
-
32
  model = AutoModelForCausalLM.from_pretrained(
33
  model_id,
34
  torch_dtype=torch.float16,
35
  device_map="auto",
36
  trust_remote_code=True
37
  )
38
-
39
  load_time = time.time() - start_time
40
  print(f"βœ… Model loaded in {load_time:.2f}s")
41
-
42
  # Build messages in proper chat format
43
  messages = []
44
-
45
- # Add system prompt first
46
  system_prompt = "You are Athena, a helpful, harmless, and honest AI assistant. You provide clear, accurate, and concise responses to user questions. You are knowledgeable across many domains and always aim to be respectful and helpful. You are finetuned by Aayan Mishra"
47
  messages.append({"role": "system", "content": system_prompt})
48
-
49
- # Add conversation history
50
  for user_msg, assistant_msg in conversation:
51
  if user_msg:
52
  messages.append({"role": "user", "content": user_msg})
53
  if assistant_msg:
54
  messages.append({"role": "assistant", "content": assistant_msg})
55
-
56
- # Add current user message
57
  messages.append({"role": "user", "content": user_message})
58
-
59
- # Apply chat template
60
  prompt = tokenizer.apply_chat_template(
61
  messages,
62
  tokenize=False,
63
  add_generation_prompt=True
64
  )
65
-
66
- # Tokenize and move to GPU
67
  inputs = tokenizer(prompt, return_tensors="pt")
68
-
69
- # Move inputs to the same device as the model
70
  device = next(model.parameters()).device
71
  inputs = {k: v.to(device) for k, v in inputs.items()}
72
-
73
  generation_start = time.time()
74
  with torch.no_grad():
75
  outputs = model.generate(
@@ -81,38 +62,26 @@ def generate_response(model_id, conversation, user_message, max_length=512, temp
81
  pad_token_id=tokenizer.eos_token_id,
82
  eos_token_id=tokenizer.eos_token_id
83
  )
84
-
85
  generation_time = time.time() - generation_start
86
-
87
- # Decode response
88
  response = tokenizer.decode(
89
  outputs[0][inputs['input_ids'].shape[-1]:],
90
  skip_special_tokens=True
91
  ).strip()
92
-
93
  return response, load_time, generation_time
94
 
95
  def respond(message, history, model_name, max_length, temperature):
96
  """Main function for ChatInterface - simplified signature"""
97
  if not message.strip():
98
  return "Please enter a message"
99
-
100
- # Get model ID
101
  model_id = MODELS.get(model_name, MODELS["Athena-R3X 8B"])
102
-
103
  try:
104
- # Generate response using ZeroGPU
105
  response, load_time, generation_time = generate_response(
106
  model_id, history, message, max_length, temperature
107
  )
108
-
109
- # ChatInterface expects a generator for streaming or just return the response
110
  return response
111
-
112
  except Exception as e:
113
  return f"Error: {str(e)}"
114
 
115
- # CSS for better styling
116
  css = """
117
  .message {
118
  padding: 10px;
@@ -123,12 +92,9 @@ css = """
123
 
124
  theme = gr.themes.Monochrome()
125
 
126
- # Create ChatInterface
127
  with gr.Blocks(title="Athena Playground Chat", css=css, theme=theme) as demo:
128
  gr.Markdown("# πŸš€ Athena Playground Chat")
129
  gr.Markdown("*Powered by HuggingFace ZeroGPU*")
130
-
131
- # Additional inputs for configuration
132
  with gr.Row():
133
  with gr.Column(scale=1):
134
  model_choice = gr.Dropdown(
@@ -149,8 +115,6 @@ with gr.Blocks(title="Athena Playground Chat", css=css, theme=theme) as demo:
149
  label="🎨 Creativity",
150
  info="Higher values = more creative responses"
151
  )
152
-
153
- # Create the ChatInterface
154
  chat_interface = gr.ChatInterface(
155
  fn=respond,
156
  additional_inputs=[model_choice, max_length, temperature],
@@ -158,18 +122,20 @@ with gr.Blocks(title="Athena Playground Chat", css=css, theme=theme) as demo:
158
  description="Ask Athena anything!",
159
  theme="soft",
160
  examples=[
161
- "Hello! How are you?",
162
- "What can you help me with?",
163
- "Tell me about artificial intelligence",
164
- "Write a short poem about space"
165
  ],
166
  cache_examples=False,
167
  chatbot=gr.Chatbot(
168
  height=500,
169
  placeholder="Start chatting with Athena...",
170
- show_share_button=False
171
- )
 
 
172
  )
173
 
174
  if __name__ == "__main__":
175
- demo.launch()
 
20
  @spaces.GPU
21
  def generate_response(model_id, conversation, user_message, max_length=512, temperature=0.7):
22
  """Generate response using ZeroGPU - all CUDA operations happen here"""
 
 
23
  print(f"πŸš€ Loading {model_id}...")
24
  start_time = time.time()
 
25
  tokenizer = AutoTokenizer.from_pretrained(model_id)
26
  if tokenizer.pad_token is None:
27
  tokenizer.pad_token = tokenizer.eos_token
 
28
  model = AutoModelForCausalLM.from_pretrained(
29
  model_id,
30
  torch_dtype=torch.float16,
31
  device_map="auto",
32
  trust_remote_code=True
33
  )
 
34
  load_time = time.time() - start_time
35
  print(f"βœ… Model loaded in {load_time:.2f}s")
 
36
  # Build messages in proper chat format
37
  messages = []
 
 
38
  system_prompt = "You are Athena, a helpful, harmless, and honest AI assistant. You provide clear, accurate, and concise responses to user questions. You are knowledgeable across many domains and always aim to be respectful and helpful. You are finetuned by Aayan Mishra"
39
  messages.append({"role": "system", "content": system_prompt})
 
 
40
  for user_msg, assistant_msg in conversation:
41
  if user_msg:
42
  messages.append({"role": "user", "content": user_msg})
43
  if assistant_msg:
44
  messages.append({"role": "assistant", "content": assistant_msg})
 
 
45
  messages.append({"role": "user", "content": user_message})
 
 
46
  prompt = tokenizer.apply_chat_template(
47
  messages,
48
  tokenize=False,
49
  add_generation_prompt=True
50
  )
 
 
51
  inputs = tokenizer(prompt, return_tensors="pt")
 
 
52
  device = next(model.parameters()).device
53
  inputs = {k: v.to(device) for k, v in inputs.items()}
 
54
  generation_start = time.time()
55
  with torch.no_grad():
56
  outputs = model.generate(
 
62
  pad_token_id=tokenizer.eos_token_id,
63
  eos_token_id=tokenizer.eos_token_id
64
  )
 
65
  generation_time = time.time() - generation_start
 
 
66
  response = tokenizer.decode(
67
  outputs[0][inputs['input_ids'].shape[-1]:],
68
  skip_special_tokens=True
69
  ).strip()
 
70
  return response, load_time, generation_time
71
 
72
  def respond(message, history, model_name, max_length, temperature):
73
  """Main function for ChatInterface - simplified signature"""
74
  if not message.strip():
75
  return "Please enter a message"
 
 
76
  model_id = MODELS.get(model_name, MODELS["Athena-R3X 8B"])
 
77
  try:
 
78
  response, load_time, generation_time = generate_response(
79
  model_id, history, message, max_length, temperature
80
  )
 
 
81
  return response
 
82
  except Exception as e:
83
  return f"Error: {str(e)}"
84
 
 
85
  css = """
86
  .message {
87
  padding: 10px;
 
92
 
93
  theme = gr.themes.Monochrome()
94
 
 
95
  with gr.Blocks(title="Athena Playground Chat", css=css, theme=theme) as demo:
96
  gr.Markdown("# πŸš€ Athena Playground Chat")
97
  gr.Markdown("*Powered by HuggingFace ZeroGPU*")
 
 
98
  with gr.Row():
99
  with gr.Column(scale=1):
100
  model_choice = gr.Dropdown(
 
115
  label="🎨 Creativity",
116
  info="Higher values = more creative responses"
117
  )
 
 
118
  chat_interface = gr.ChatInterface(
119
  fn=respond,
120
  additional_inputs=[model_choice, max_length, temperature],
 
122
  description="Ask Athena anything!",
123
  theme="soft",
124
  examples=[
125
+ ["Hello! How are you?", "Athena-R3X 8B", 512, 0.7],
126
+ ["What can you help me with?", "Athena-R3X 8B", 512, 0.7],
127
+ ["Tell me about artificial intelligence", "Athena-R3X 8B", 512, 0.7],
128
+ ["Write a short poem about space", "Athena-R3X 8B", 512, 0.7]
129
  ],
130
  cache_examples=False,
131
  chatbot=gr.Chatbot(
132
  height=500,
133
  placeholder="Start chatting with Athena...",
134
+ show_share_button=False,
135
+ type="messages"
136
+ ),
137
+ type="messages"
138
  )
139
 
140
  if __name__ == "__main__":
141
+ demo.launch()