File size: 2,516 Bytes
be1aa47 c94cc88 11d7701 c94cc88 11d7701 c94cc88 be1aa47 c94cc88 be1aa47 11d7701 be1aa47 11d7701 ca35e53 fe25716 be1aa47 fe25716 be1aa47 ca35e53 96edac1 fe25716 96edac1 ca35e53 fe25716 96edac1 fe25716 11d7701 6a2645a be1aa47 fe25716 6a2645a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import random
# Initialize model
model_path = hf_hub_download(
repo_id="AstroMLab/AstroSage-8B-GGUF",
filename="AstroSage-8B-Q8_0.gguf"
)
llm = Llama(
model_path=model_path,
n_ctx=2048,
n_threads=4,
chat_format="llama-3",
seed=42,
f16_kv=True,
logits_all=False,
use_mmap=True,
use_gpu=True
)
# Placeholder responses for when context is empty
GREETING_MESSAGES = [
"Greetings! I am AstroSage, your guide to the cosmos. What would you like to explore today?",
"Welcome to our cosmic journey! I am AstroSage. How may I assist you in understanding the universe?",
"AstroSage here. Ready to explore the mysteries of space and time. How may I be of assistance?",
"The universe awaits! I'm AstroSage. What astronomical wonders shall we discuss?",
]
def get_random_greeting():
return random.choice(GREETING_MESSAGES)
def respond_stream(message, history):
if not message: # Handle empty messages
return
system_message = "You are AstroSage, a highly knowledgeable AI assistant..." # ... (your system message)
messages = [{"role": "system", "content": system_message}]
# Format history correctly (especially important if you use clear)
for user, assistant in history:
messages.append({"role": "user", "content": user})
if assistant: # Check if assistant message exists
messages.append({"role": "assistant", "content": assistant})
messages.append({"role": "user", "content": message})
try:
response_content = ""
for chunk in llm.create_chat_completion(
messages=messages,
max_tokens=512,
temperature=0.7,
top_p=0.9,
stream=True
):
delta = chunk["choices"][0]["delta"]
if "content" in delta: # check if content exists in delta
response_content += delta["content"]
yield response_content # yield inside the loop for streaming
except Exception as e:
yield f"Error during generation: {e}"
# Display the welcome message as the first assistant message
initial_message = random.choice(GREETING_MESSAGES)
chatbot = gr.Chatbot(value=[[None, initial_message]]) # Set initial value here
with gr.Blocks() as demo:
chatbot.render()
clear = gr.Button("Clear")
clear.click(lambda: None, None, chatbot, fn=lambda: [])
demo.queue().launch() |