hackergeek commited on
Commit
f086418
Β·
verified Β·
1 Parent(s): 0a5d97f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -34
app.py CHANGED
@@ -2,15 +2,18 @@ import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
4
 
5
- # Load your fine-tuned model and tokenizer
6
  model = AutoModelForCausalLM.from_pretrained(
7
  "hackergeek/gemma-finetuned",
8
- torch_dtype=torch.float16,
9
- device_map="auto"
10
  )
11
  tokenizer = AutoTokenizer.from_pretrained("hackergeek/gemma-finetuned")
12
  tokenizer.pad_token = tokenizer.eos_token
13
 
 
 
 
14
  def format_prompt(message, history):
15
  """Format the prompt with conversation history"""
16
  system_prompt = "You are a knowledgeable space expert assistant. Answer questions about astronomy, space exploration, and related topics in a clear and engaging manner."
@@ -26,55 +29,47 @@ def respond(message, history):
26
  # Format the prompt with conversation history
27
  full_prompt = format_prompt(message, history)
28
 
29
- # Tokenize input
30
- inputs = tokenizer(full_prompt, return_tensors="pt", add_special_tokens=False).to(model.device)
31
 
32
- # Generate response
33
  outputs = model.generate(
34
- **inputs,
35
- max_new_tokens=1024,
 
36
  temperature=0.7,
37
- top_p=0.9,
38
  repetition_penalty=1.1,
39
- do_sample=True
 
40
  )
41
 
42
- # Decode and extract only the new response
43
  response = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
44
 
45
  return response
46
 
47
- # Custom CSS for space theme
48
  space_css = """
49
- .gradio-container {
50
- background: linear-gradient(45deg, #000000, #1a1a2e);
51
- color: white;
52
- }
53
- .chatbot {
54
- background-color: rgba(0, 0, 0, 0.7) !important;
55
- border: 1px solid #4a4a4a !important;
56
- }
57
  """
58
 
59
- # Create the interface
60
- with gr.Blocks(css=space_css, theme=gr.themes.Default(primary_hue="blue", secondary_hue="purple")) as demo:
61
- gr.Markdown("# πŸš€ Space Explorer Chatbot 🌌")
62
- gr.Markdown("Ask me anything about space! Planets, stars, galaxies, or space exploration!")
63
 
64
  chatbot = gr.ChatInterface(
65
  respond,
66
  examples=[
67
- "Explain black holes in simple terms",
68
- "What's the latest news about Mars exploration?",
69
- "How do stars form?",
70
- "Tell me about the James Webb Space Telescope"
71
  ],
72
- retry_btn=None,
73
- undo_btn=None,
74
- clear_btn="Clear History",
75
  )
76
-
77
- chatbot.chatbot.height = 600
78
 
79
  if __name__ == "__main__":
80
- demo.launch(share=True)
 
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
4
 
5
+ # Load model and tokenizer with CPU optimizations
6
  model = AutoModelForCausalLM.from_pretrained(
7
  "hackergeek/gemma-finetuned",
8
+ torch_dtype=torch.float32, # Changed to float32 for CPU compatibility
9
+ device_map="cpu" # Force CPU usage
10
  )
11
  tokenizer = AutoTokenizer.from_pretrained("hackergeek/gemma-finetuned")
12
  tokenizer.pad_token = tokenizer.eos_token
13
 
14
+ # Explicitly move model to CPU (redundant but safe)
15
+ model.to("cpu")
16
+
17
  def format_prompt(message, history):
18
  """Format the prompt with conversation history"""
19
  system_prompt = "You are a knowledgeable space expert assistant. Answer questions about astronomy, space exploration, and related topics in a clear and engaging manner."
 
29
  # Format the prompt with conversation history
30
  full_prompt = format_prompt(message, history)
31
 
32
+ # Tokenize input (keep on CPU)
33
+ inputs = tokenizer(full_prompt, return_tensors="pt", add_special_tokens=False)
34
 
35
+ # Generate response with CPU-friendly parameters
36
  outputs = model.generate(
37
+ input_ids=inputs.input_ids,
38
+ attention_mask=inputs.attention_mask,
39
+ max_new_tokens=512, # Reduced for faster CPU processing
40
  temperature=0.7,
41
+ top_p=0.85,
42
  repetition_penalty=1.1,
43
+ do_sample=True,
44
+ no_repeat_ngram_size=2 # Added to reduce repetition
45
  )
46
 
47
+ # Decode response
48
  response = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
49
 
50
  return response
51
 
52
+ # Simplified CSS for better CPU rendering
53
  space_css = """
54
+ .gradio-container { background: #000000; color: #ffffff; }
55
+ .chatbot { background: #0a0a2a !important; }
 
 
 
 
 
 
56
  """
57
 
58
+ with gr.Blocks(css=space_css) as demo:
59
+ gr.Markdown("# πŸš€ CPU Space Chatbot 🌌")
60
+ gr.Markdown("Note: Responses may be slower due to CPU processing")
 
61
 
62
  chatbot = gr.ChatInterface(
63
  respond,
64
  examples=[
65
+ "What is a neutron star?",
66
+ "Explain the Big Bang theory",
67
+ "How do rockets work?",
68
+ "What's the temperature on Venus?"
69
  ],
70
+ clear_btn="Clear",
 
 
71
  )
72
+ chatbot.chatbot.height = 500
 
73
 
74
  if __name__ == "__main__":
75
+ demo.launch(server_name="0.0.0.0", server_port=7860)