import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM import torch # Load model and tokenizer with CPU optimizations model = AutoModelForCausalLM.from_pretrained( "hackergeek/gemma-finetuned", torch_dtype=torch.float32, # Changed to float32 for CPU compatibility device_map="cpu" # Force CPU usage ) tokenizer = AutoTokenizer.from_pretrained("hackergeek/gemma-finetuned") tokenizer.pad_token = tokenizer.eos_token # Explicitly move model to CPU (redundant but safe) model.to("cpu") def format_prompt(message, history): """Format the prompt with conversation history""" system_prompt = "You are a knowledgeable space expert assistant. Answer questions about astronomy, space exploration, and related topics in a clear and engaging manner." prompt = f"{system_prompt}\n" for user_msg, bot_msg in history: prompt += f"{user_msg}\n{bot_msg}\n" prompt += f"{message}\n" return prompt def respond(message, history): # Format the prompt with conversation history full_prompt = format_prompt(message, history) # Tokenize input (keep on CPU) inputs = tokenizer(full_prompt, return_tensors="pt", add_special_tokens=False) # Generate response with CPU-friendly parameters outputs = model.generate( input_ids=inputs.input_ids, attention_mask=inputs.attention_mask, max_new_tokens=512, # Reduced for faster CPU processing temperature=0.7, top_p=0.85, repetition_penalty=1.1, do_sample=True, no_repeat_ngram_size=2 # Added to reduce repetition ) # Decode response response = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True) return response # Simplified CSS for better CPU rendering space_css = """ .gradio-container { background: #000000; color: #ffffff; } .chatbot { background: #0a0a2a !important; } """ with gr.Blocks(css=space_css) as demo: gr.Markdown("# 🚀 CPU Space Chatbot 🌌") gr.Markdown("Note: Responses may be slower due to CPU processing") chatbot = gr.ChatInterface( respond, examples=[ "What is a neutron star?", "Explain the Big Bang theory", "How do rockets work?", "What's the temperature on Venus?" ], clear_btn="Clear", ) chatbot.chatbot.height = 500 if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=7860)