import gradio as gr from llama_cpp import Llama from huggingface_hub import hf_hub_download import random # Initialize model model_path = hf_hub_download( repo_id="AstroMLab/AstroSage-8B-GGUF", filename="AstroSage-8B-Q8_0.gguf" ) llm = Llama( model_path=model_path, n_ctx=2048, n_threads=4, chat_format="llama-3", seed=42, f16_kv=True, logits_all=False, use_mmap=True, use_gpu=True ) # Placeholder responses for when context is empty GREETING_MESSAGES = [ "Greetings! I am AstroSage, your guide to the cosmos. What would you like to explore today?", "Welcome to our cosmic journey! I am AstroSage. How may I assist you in understanding the universe?", "AstroSage here. Ready to explore the mysteries of space and time. How may I be of assistance?", "The universe awaits! I'm AstroSage. What astronomical wonders shall we discuss?", ] def get_random_greeting(): return random.choice(GREETING_MESSAGES) def respond(message, history, system_message, max_tokens, temperature, top_p): messages = [{"role": "system", "content": system_message}] for user_msg, assistant_msg in history: if user_msg: messages.append({"role": "user", "content": user_msg}) if assistant_msg: messages.append({"role": "assistant", "content": assistant_msg}) messages.append({"role": "user", "content": message}) try: # Stream response from LLM stream = llm.create_chat_completion( messages=messages, max_tokens=max_tokens, temperature=temperature, top_p=top_p, stream=True # Enable streaming ) response_content = "" for chunk in stream: response_content += chunk["choices"][0]["delta"]["content"] yield response_content # Stream each chunk back to the frontend except Exception as e: yield f"Error: {e}" def clear_context(): greeting_message = get_random_greeting() return [("", greeting_message)], "" # Gradio Interface with gr.Blocks() as demo: gr.HTML("
AstroSage-LLAMA-3.1-8B
Astronomy-Specialized Chatbot
") chatbot = gr.Chatbot(height=400) msg = gr.Textbox(placeholder="Ask about astronomy, astrophysics, or cosmology...", show_label=False) with gr.Accordion("Advanced Settings", open=False) as advanced_settings: system_msg = gr.Textbox( value="You are AstroSage, a highly knowledgeable AI assistant specialized in astronomy, astrophysics, and cosmology. Provide accurate, engaging, and educational responses about space science and the universe.", label="System Message", lines=3 ) max_tokens = gr.Slider(1, 2048, value=512, step=1, label="Max Tokens") temperature = gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature") top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p") # Automatically handle submission on Enter key press with streaming def handle_submit(message, history, system_message, max_tokens, temperature, top_p): history.append((message, None)) # Append user's message first # Stream the assistant's response and update the history for response in respond(message, history, system_message, max_tokens, temperature, top_p): history[-1] = (message, response) yield history, "" # Yield updated history to display in the chatbox # Use the new Gradio `chatbot.update` pattern by returning the updated value msg.submit( handle_submit, inputs=[msg, chatbot, system_msg, max_tokens, temperature, top_p], outputs=[chatbot, msg], queue=False ) # Automatically clear context on reload with a greeting demo.load(lambda: clear_context(), None, [chatbot, msg]) if __name__ == "__main__": demo.launch()