AstroSage / app.py
Tijmen2's picture
Update app.py
0264b98 verified
raw
history blame
6.39 kB
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import random
import spaces
import torch
# Get the number of available CPU cores
import multiprocessing
n_cores = multiprocessing.cpu_count()
# Initialize model with optimized parameters
model_path = hf_hub_download(
repo_id="AstroMLab/AstroSage-8B-GGUF",
filename="AstroSage-8B-Q8_0.gguf"
)
# Optimized LLaMA parameters for A100
llm = Llama(
model_path=model_path,
n_ctx=2048, # Keep context window reasonable
n_threads=n_cores, # Use all available CPU cores
n_batch=512, # Increase batch size for faster processing
n_gpu_layers=35, # Offload more layers to GPU
chat_format="llama-3",
seed=42,
f16_kv=True, # Use FP16 for key/value cache
logits_all=False,
use_mmap=False, # Disable memory mapping for faster loading
use_gpu=True,
tensor_split=None, # Let the model handle tensor splitting
)
# Optimize CUDA settings if available
if torch.cuda.is_available():
torch.backends.cuda.matmul.allow_tf32 = True # Allow TF32 for faster matrix multiplication
torch.backends.cudnn.benchmark = True # Enable cudnn autotuner
# Placeholder responses for when context is empty
GREETING_MESSAGES = [
"Greetings! I am AstroSage, your guide to the cosmos. What would you like to explore today?",
"Welcome to our cosmic journey! I am AstroSage. How may I assist you in understanding the universe?",
"AstroSage here. Ready to explore the mysteries of space and time. How may I be of assistance?",
"The universe awaits! I'm AstroSage. What astronomical wonders shall we discuss?",
]
def user(user_message, history):
"""Add user message to chat history."""
if history is None:
history = []
return "", history + [{"role": "user", "content": user_message}]
@spaces.GPU
def bot(history):
"""Generate and stream the bot's response with optimized parameters."""
if not history:
history = []
# Optimize context by limiting history
max_history_tokens = 1024 # Reserve half of context for response
recent_history = history[-5:] # Keep only last 5 messages for context
# Prepare the messages for the model
messages = [
{
"role": "system",
"content": "You are AstroSage, an intelligent AI assistant specializing in astronomy, astrophysics, and space science. Be concise and direct in your responses while maintaining accuracy."
}
]
# Add optimized chat history
for message in recent_history[:-1]:
messages.append({"role": message["role"], "content": message["content"]})
# Add the current user message
messages.append({"role": "user", "content": history[-1]["content"]})
# Start generating the response
history.append({"role": "assistant", "content": ""})
# Optimized streaming parameters
response = llm.create_chat_completion(
messages=messages,
max_tokens=512,
temperature=0.7,
top_p=0.95,
stream=True,
top_k=40, # Add top-k sampling
repeat_penalty=1.1, # Slight penalty for repetition
mirostat_mode=2, # Enable Mirostat sampling
mirostat_tau=5.0,
mirostat_eta=0.1,
)
for chunk in response:
if chunk and "content" in chunk["choices"][0]["delta"]:
history[-1]["content"] += chunk["choices"][0]["delta"]["content"]
yield history
def initial_greeting():
"""Return properly formatted initial greeting."""
return [{"role": "assistant", "content": random.choice(GREETING_MESSAGES)}]
# Custom CSS for a space theme
custom_css = """
#component-0 {
background-color: #1a1a2e;
border-radius: 15px;
padding: 20px;
}
.dark {
background-color: #0f0f1a;
}
.contain {
max-width: 1200px !important;
}
"""
# Create the Gradio interface with optimized queue settings
with gr.Blocks(css=custom_css, theme=gr.themes.Soft(primary_hue="indigo", neutral_hue="slate")) as demo:
gr.Markdown(
"""
# 🌌 AstroSage: Your Cosmic AI Companion
Welcome to AstroSage, an advanced AI assistant specializing in astronomy, astrophysics, and cosmology.
Powered by the AstroSage-8B model, I'm here to help you explore the wonders of the universe!
### What Can I Help You With?
- πŸͺ Explanations of astronomical phenomena
- πŸš€ Space exploration and missions
- ⭐ Stars, galaxies, and cosmology
- 🌍 Planetary science and exoplanets
- πŸ“Š Astrophysics concepts and theories
- πŸ”­ Astronomical instruments and observations
Just type your question below and let's embark on a cosmic journey together!
"""
)
chatbot = gr.Chatbot(
label="Chat with AstroSage",
bubble_full_width=False,
show_label=True,
height=450,
type="messages"
)
with gr.Row():
msg = gr.Textbox(
label="Type your message here",
placeholder="Ask me anything about space and astronomy...",
scale=9
)
clear = gr.Button("Clear Chat", scale=1)
# Example questions for quick start
gr.Examples(
examples=[
"What is a black hole and how does it form?",
"Can you explain the life cycle of a star?",
"What are exoplanets and how do we detect them?",
"Tell me about the James Webb Space Telescope.",
"What is dark matter and why is it important?"
],
inputs=msg,
label="Example Questions"
)
# Set up the message chain with optimized queuing
msg.submit(
user,
[msg, chatbot],
[msg, chatbot],
queue=False
).then(
bot,
chatbot,
chatbot,
queue=True, # Enable queuing for bot responses
batch=True, # Enable batching
max_batch_size=4 # Process up to 4 requests together
)
# Clear button functionality
clear.click(lambda: None, None, chatbot, queue=False)
# Initial greeting
demo.load(initial_greeting, None, chatbot, queue=False)
# Launch the app with optimized settings
if __name__ == "__main__":
#demo.queue(concurrency_count=2) # Allow 2 concurrent requests
demo.launch()