Spaces:
Runtime error
Runtime error
File size: 2,596 Bytes
f1ff7a7 dc27180 f1ff7a7 f086418 dc27180 f086418 dc27180 f1ff7a7 f086418 dc27180 f1ff7a7 dc27180 f086418 dc27180 f086418 dc27180 f086418 dc27180 f086418 dc27180 f086418 dc27180 f086418 dc27180 f1ff7a7 f086418 dc27180 f086418 f1ff7a7 f086418 6a97a99 dc27180 6a97a99 f086418 6a97a99 f086418 6a97a99 f086418 f1ff7a7 f086418 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
# Load model and tokenizer with CPU optimizations
model = AutoModelForCausalLM.from_pretrained(
"hackergeek/gemma-finetuned",
torch_dtype=torch.float32, # Changed to float32 for CPU compatibility
device_map="cpu" # Force CPU usage
)
tokenizer = AutoTokenizer.from_pretrained("hackergeek/gemma-finetuned")
tokenizer.pad_token = tokenizer.eos_token
# Explicitly move model to CPU (redundant but safe)
model.to("cpu")
def format_prompt(message, history):
"""Format the prompt with conversation history"""
system_prompt = "You are a knowledgeable space expert assistant. Answer questions about astronomy, space exploration, and related topics in a clear and engaging manner."
prompt = f"<system>{system_prompt}</system>\n"
for user_msg, bot_msg in history:
prompt += f"<user>{user_msg}</user>\n<assistant>{bot_msg}</assistant>\n"
prompt += f"<user>{message}</user>\n<assistant>"
return prompt
def respond(message, history):
# Format the prompt with conversation history
full_prompt = format_prompt(message, history)
# Tokenize input (keep on CPU)
inputs = tokenizer(full_prompt, return_tensors="pt", add_special_tokens=False)
# Generate response with CPU-friendly parameters
outputs = model.generate(
input_ids=inputs.input_ids,
attention_mask=inputs.attention_mask,
max_new_tokens=512, # Reduced for faster CPU processing
temperature=0.7,
top_p=0.85,
repetition_penalty=1.1,
do_sample=True,
no_repeat_ngram_size=2 # Added to reduce repetition
)
# Decode response
response = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
return response
# Simplified CSS for better CPU rendering
space_css = """
.gradio-container { background: #000000; color: #ffffff; }
.chatbot { background: #0a0a2a !important; }
"""
with gr.Blocks(css=space_css) as demo:
gr.Markdown("# π CPU Space Chatbot π")
gr.Markdown("Note: Responses may be slower due to CPU processing")
chatbot = gr.ChatInterface(
respond,
examples=[
"What is a neutron star?",
"Explain the Big Bang theory",
"How do rockets work?",
"What's the temperature on Venus?"
],
clear_btn="Clear",
)
chatbot.chatbot.height = 500
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860) |