import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import random

# Initialize model
model_path = hf_hub_download(
    repo_id="AstroMLab/AstroSage-8B-GGUF",
    filename="AstroSage-8B-Q8_0.gguf"
)

llm = Llama(
    model_path=model_path,
    n_ctx=2048,
    n_threads=4,
    chat_format="llama-3",
    seed=42,
    f16_kv=True,
    logits_all=False,
    use_mmap=True,
    use_gpu=True
)

# Placeholder responses for when context is empty
GREETING_MESSAGES = [
    "Greetings! I am AstroSage, your guide to the cosmos. What would you like to explore today?",
    "Welcome to our cosmic journey! I am AstroSage. How may I assist you in understanding the universe?",
    "AstroSage here. Ready to explore the mysteries of space and time. How may I be of assistance?",
    "The universe awaits! I'm AstroSage. What astronomical wonders shall we discuss?",
]

def respond_stream(message, history):
    if not message:
        return

    system_message = "Assume the role of AstroSage, a helpful chatbot designed to answer user queries about astronomy, astrophysics, and cosmology."
    messages = [{"role": "system", "content": system_message}]
    for user, assistant in history:
        messages.append({"role": "user", "content": user})
        if assistant:
            messages.append({"role": "assistant", "content": assistant})
    messages.append({"role": "user", "content": message})

    try:
        past_tokens = ""  # Accumulate and yield all tokens so far
        for chunk in llm.create_chat_completion(
            messages=messages,
            max_tokens=512,
            temperature=0.7,
            top_p=0.9,
            stream=True
        ):
            delta = chunk["choices"][0]["delta"]
            if "content" in delta:
                new_tokens = delta["content"]
                past_tokens += new_tokens
                yield past_tokens  # Yield the accumulated response to allow streaming
    except Exception as e:
        yield f"Error during generation: {e}"

initial_message = random.choice(GREETING_MESSAGES)
chatbot = gr.Chatbot([[None, initial_message]]).style(height=750)  # Set height

with gr.Blocks() as demo:
    with gr.Row():
        with gr.Column(scale=0.8):
            chatbot.render()

        with gr.Column(scale=0.2):
           clear = gr.Button("Clear")

    clear.click(lambda: [], None, chatbot,queue=False)

    demo.queue().launch()