File size: 3,926 Bytes
be1aa47 c94cc88 11d7701 c94cc88 11d7701 c94cc88 be1aa47 c94cc88 be1aa47 11d7701 be1aa47 11d7701 be1aa47 96edac1 bfab850 96edac1 bfab850 96edac1 bfab850 96edac1 bfab850 11d7701 4efa545 11d7701 9e7d5d4 96edac1 4efa545 11d7701 96edac1 9e7d5d4 11d7701 96edac1 11d7701 bfab850 96edac1 bfab850 96edac1 9e7d5d4 96edac1 9e7d5d4 11d7701 4efa545 9e7d5d4 be1aa47 96edac1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import random
# Initialize model
model_path = hf_hub_download(
repo_id="AstroMLab/AstroSage-8B-GGUF",
filename="AstroSage-8B-Q8_0.gguf"
)
llm = Llama(
model_path=model_path,
n_ctx=2048,
n_threads=4,
chat_format="llama-3",
seed=42,
f16_kv=True,
logits_all=False,
use_mmap=True,
use_gpu=True
)
# Placeholder responses for when context is empty
GREETING_MESSAGES = [
"Greetings! I am AstroSage, your guide to the cosmos. What would you like to explore today?",
"Welcome to our cosmic journey! I am AstroSage. How may I assist you in understanding the universe?",
"AstroSage here. Ready to explore the mysteries of space and time. How may I be of assistance?",
"The universe awaits! I'm AstroSage. What astronomical wonders shall we discuss?",
]
def get_random_greeting():
return random.choice(GREETING_MESSAGES)
def respond(message, history, system_message, max_tokens, temperature, top_p):
messages = [{"role": "system", "content": system_message}]
for user_msg, assistant_msg in history:
if user_msg:
messages.append({"role": "user", "content": user_msg})
if assistant_msg:
messages.append({"role": "assistant", "content": assistant_msg})
messages.append({"role": "user", "content": message})
try:
# Stream response from LLM
stream = llm.create_chat_completion(
messages=messages,
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
stream=True # Enable streaming
)
response_content = ""
for chunk in stream:
response_content += chunk["choices"][0]["delta"]["content"]
yield response_content # Stream each chunk back to the frontend
except Exception as e:
yield f"Error: {e}"
def clear_context():
greeting_message = get_random_greeting()
return [("", greeting_message)], ""
# Gradio Interface
with gr.Blocks() as demo:
gr.HTML("<div class='header-text'>AstroSage-LLAMA-3.1-8B</div><div class='subheader'>Astronomy-Specialized Chatbot</div>")
chatbot = gr.Chatbot(height=400)
msg = gr.Textbox(placeholder="Ask about astronomy, astrophysics, or cosmology...", show_label=False)
with gr.Accordion("Advanced Settings", open=False) as advanced_settings:
system_msg = gr.Textbox(
value="You are AstroSage, a highly knowledgeable AI assistant specialized in astronomy, astrophysics, and cosmology. Provide accurate, engaging, and educational responses about space science and the universe.",
label="System Message",
lines=3
)
max_tokens = gr.Slider(1, 2048, value=512, step=1, label="Max Tokens")
temperature = gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature")
top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p")
# Automatically handle submission on Enter key press with streaming
def handle_submit(message, history, system_message, max_tokens, temperature, top_p):
history.append((message, None)) # Append user's message first
chatbot.update(history) # Display user's message before response
for response in respond(message, history, system_message, max_tokens, temperature, top_p):
history[-1] = (message, response) # Update the last response with streaming content
chatbot.update(history)
return history, ""
msg.submit(
handle_submit,
inputs=[msg, chatbot, system_msg, max_tokens, temperature, top_p],
outputs=[chatbot, msg],
queue=False
)
# Automatically clear context on reload with a greeting
demo.load(lambda: clear_context(), None, [chatbot, msg])
if __name__ == "__main__":
demo.launch()
|