File size: 2,463 Bytes
be1aa47 c94cc88 11d7701 c94cc88 11d7701 c94cc88 be1aa47 c94cc88 be1aa47 11d7701 be1aa47 11d7701 ca35e53 be1aa47 ca35e53 96edac1 bfab850 96edac1 ca35e53 bfab850 96edac1 ca35e53 bfab850 ca35e53 96edac1 bfab850 11d7701 ca35e53 96edac1 ca35e53 be1aa47 ca35e53 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import random
# Initialize model
model_path = hf_hub_download(
repo_id="AstroMLab/AstroSage-8B-GGUF",
filename="AstroSage-8B-Q8_0.gguf"
)
llm = Llama(
model_path=model_path,
n_ctx=2048,
n_threads=4,
chat_format="llama-3",
seed=42,
f16_kv=True,
logits_all=False,
use_mmap=True,
use_gpu=True
)
# Placeholder responses for when context is empty
GREETING_MESSAGES = [
"Greetings! I am AstroSage, your guide to the cosmos. What would you like to explore today?",
"Welcome to our cosmic journey! I am AstroSage. How may I assist you in understanding the universe?",
"AstroSage here. Ready to explore the mysteries of space and time. How may I be of assistance?",
"The universe awaits! I'm AstroSage. What astronomical wonders shall we discuss?",
]
def get_random_greeting():
return random.choice(GREETING_MESSAGES)
# Function to handle the chat response with streaming
def respond_stream(message, history):
# Add the system message and previous chat history
system_message = "You are AstroSage, a highly knowledgeable AI assistant specialized in astronomy, astrophysics, and cosmology. Provide accurate, engaging, and educational responses about space science and the universe."
messages = [{"role": "system", "content": system_message}]
for user_msg, assistant_msg in history:
if user_msg:
messages.append({"role": "user", "content": user_msg})
if assistant_msg:
messages.append({"role": "assistant", "content": assistant_msg})
messages.append({"role": "user", "content": message})
try:
# Stream response from LLM
stream = llm.create_chat_completion(
messages=messages,
max_tokens=512,
temperature=0.7,
top_p=0.9,
stream=True # Enable streaming
)
# Stream the chunks of the response
response_content = ""
for chunk in stream:
response_content += chunk["choices"][0]["delta"]["content"]
yield response_content
except Exception as e:
yield f"Error: {e}"
# Using gr.ChatInterface for a simpler chat UI
chatbot = gr.ChatInterface(fn=respond_stream, type="messages")
# Set a welcome message
chatbot.set_welcome_message(get_random_greeting())
if __name__ == "__main__":
chatbot.launch()
|