Spaces:

AstroMLab
/

AstroSage-8B

Running on Zero

App Files Files Community

AstroSage-8B / app.py

Tijmen2

Update app.py

abe401d verified 7 months ago

raw

history blame

2.44 kB

	import gradio as gr
	from llama_cpp import Llama
	from huggingface_hub import hf_hub_download
	import random

	# Initialize model
	model_path = hf_hub_download(
	repo_id="AstroMLab/AstroSage-8B-GGUF",
	filename="AstroSage-8B-Q8_0.gguf"
	)

	llm = Llama(
	model_path=model_path,
	n_ctx=2048,
	n_threads=4,
	chat_format="llama-3",
	seed=42,
	f16_kv=True,
	logits_all=False,
	use_mmap=True,
	use_gpu=True
	)

	# Placeholder responses for when context is empty
	GREETING_MESSAGES = [
	"Greetings! I am AstroSage, your guide to the cosmos. What would you like to explore today?",
	"Welcome to our cosmic journey! I am AstroSage. How may I assist you in understanding the universe?",
	"AstroSage here. Ready to explore the mysteries of space and time. How may I be of assistance?",
	"The universe awaits! I'm AstroSage. What astronomical wonders shall we discuss?",
	]

	def respond_stream(message, history):
	if not message:
	return

	system_message = "Assume the role of AstroSage, a helpful chatbot designed to answer user queries about astronomy, astrophysics, and cosmology."
	messages = [{"role": "system", "content": system_message}]
	for user, assistant in history:
	messages.append({"role": "user", "content": user})
	if assistant:
	messages.append({"role": "assistant", "content": assistant})
	messages.append({"role": "user", "content": message})

	try:
	past_tokens = "" # Accumulate and yield all tokens so far
	for chunk in llm.create_chat_completion(
	messages=messages,
	max_tokens=512,
	temperature=0.7,
	top_p=0.9,
	stream=True
	):
	delta = chunk["choices"][0]["delta"]
	if "content" in delta:
	new_tokens = delta["content"]
	past_tokens += new_tokens
	yield past_tokens # Yield the accumulated response to allow streaming
	except Exception as e:
	yield f"Error during generation: {e}"

	initial_message = random.choice(GREETING_MESSAGES)
	chatbot = gr.Chatbot([[None, initial_message]]).style(height=750) # Set height

	with gr.Blocks() as demo:
	with gr.Row():
	with gr.Column(scale=0.8):
	chatbot.render()

	with gr.Column(scale=0.2):
	clear = gr.Button("Clear")

	clear.click(lambda: [], None, chatbot,queue=False)

	demo.queue().launch()