import gradio as gr
from huggingface_hub import InferenceClient
import requests
from bs4 import BeautifulSoup
from bs4.element import Comment

def tag_visible(element):
    if element.parent.name in ['style', 'script', 'head', 'title', 'meta', '[document]']:
        return False
    if isinstance(element, Comment):
        return False
    return True

def get_text_from_url(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    texts = soup.find_all(text=True)
    visible_texts = filter(tag_visible, texts)
    return "\n".join(t.strip() for t in visible_texts)

# Pre-fetch and truncate homepage text to keep the prompt short
text_list = []
homepage_url = "https://sites.google.com/view/abhilashnandy/home/"
extensions = ["", "pmrf-profile-page"]
for ext in extensions:
    full_text = get_text_from_url(homepage_url + ext)
    truncated_text = full_text[:1000]  # use only the first 1000 characters
    text_list.append(truncated_text)

SYSTEM_MESSAGE = (
    "You are a QA chatbot to answer queries (in less than 30 words) on my homepage. "
    "Context: " + " ".join(text_list)
)

# Use the GPTQ version that includes the tokenizer configuration
client = InferenceClient("TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ")

def respond(message, history: list[tuple[str, str]], system_message=SYSTEM_MESSAGE,
            max_tokens=100, temperature=0.7, top_p=0.95):
    messages = [{"role": "system", "content": system_message}]
    for q, a in history:
        messages.append({"role": "user", "content": "Question: " + q})
        messages.append({"role": "assistant", "content": "Answer: " + a})
    messages.append({"role": "user", "content": message})
    try:
        # Enable streaming mode to start receiving output faster.
        response_stream = client.chat_completion(
            messages,
            max_tokens=max_tokens,
            temperature=temperature,
            top_p=top_p,
            stream=True,
        )
        output = ""
        for chunk in response_stream:
            if hasattr(chunk, "choices") and chunk.choices:
                part = chunk.choices[0].message.get("content", "")
                output += part
        return output.strip()
    except Exception as e:
        print(f"An error occurred: {e}")
        return str(e)

initial_message = [("user", "Yo who dis Abhilash?")]
markdown_note = "## Ask Anything About Me! (Might show a tad bit of hallucination!)"

demo = gr.Blocks()
with demo:
    gr.Markdown(markdown_note)
    gr.ChatInterface(
        fn=respond,
        # examples=["Yo who dis Abhilash?", "What is Abhilash's most recent publication?"],
        additional_inputs=[
            # You can add extra Gradio components here if needed.
        ],
    )

if __name__ == "__main__":
    demo.launch()