File size: 2,756 Bytes
bbd5c76
 
06323bb
 
 
 
 
 
 
 
 
 
 
3421ed4
 
 
 
 
6761a81
3421ed4
e921d5b
06323bb
 
8735569
06323bb
717d3b5
e921d5b
717d3b5
bbd5c76
3421ed4
717d3b5
 
3421ed4
bbd5c76
e921d5b
 
bbd5c76
3421ed4
717d3b5
6761a81
717d3b5
 
 
6761a81
f5e2959
e921d5b
717d3b5
6761a81
 
f5e2959
 
717d3b5
f5e2959
717d3b5
 
 
 
 
 
f5e2959
 
3421ed4
fdefcb0
6761a81
9eb3d05
619f1a8
 
 
 
 
6761a81
823cf13
6b7515b
3421ed4
bbd5c76
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import gradio as gr
from huggingface_hub import InferenceClient
import requests
from bs4 import BeautifulSoup
from bs4.element import Comment

def tag_visible(element):
    if element.parent.name in ['style', 'script', 'head', 'title', 'meta', '[document]']:
        return False
    if isinstance(element, Comment):
        return False
    return True

def get_text_from_url(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    texts = soup.find_all(text=True)
    visible_texts = filter(tag_visible, texts)
    return "\n".join(t.strip() for t in visible_texts)

# Pre-fetch and truncate homepage text to reduce the prompt length.
text_list = []
homepage_url = "https://sites.google.com/view/abhilashnandy/home/"
extensions = ["", "pmrf-profile-page"]
for ext in extensions:
    full_text = get_text_from_url(homepage_url + ext)
    truncated_text = full_text[:1000]  # using only the first 1000 characters per extension
    text_list.append(truncated_text)

SYSTEM_MESSAGE = (
    "You are a QA chatbot to answer queries (in less than 30 words) on my homepage. "
    "Context: " + " ".join(text_list)
)

# Use a model that is both lightweight and includes a proper chat configuration.
client = InferenceClient("TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ")

def respond(message, history: list[tuple[str, str]], system_message=SYSTEM_MESSAGE,
            max_tokens=100, temperature=0.7, top_p=0.95):
    messages = [{"role": "system", "content": system_message}]
    for q, a in history:
        messages.append({"role": "user", "content": "Question: " + q})
        messages.append({"role": "assistant", "content": "Answer: " + a})
    messages.append({"role": "user", "content": message})
    try:
        # Enable streaming mode to receive output faster.
        response_stream = client.chat_completion(
            messages,
            max_tokens=max_tokens,
            temperature=temperature,
            top_p=top_p,
            stream=True,
        )
        output = ""
        for chunk in response_stream:
            if hasattr(chunk, "choices") and chunk.choices:
                part = chunk.choices[0].message.get("content", "")
                output += part
        return output.strip()
    except Exception as e:
        print(f"An error occurred: {e}")
        return str(e)

initial_message = [("user", "Yo who dis Abhilash?")]
markdown_note = "## Ask Anything About Me! (Might show a tad bit of hallucination!)"

demo = gr.Blocks()
with demo:
    gr.Markdown(markdown_note)
    gr.ChatInterface(
        fn=respond,
        examples=["Yo who dis Abhilash?", "What is Abhilash's most recent publication?"],
        additional_inputs=[],
    )

if __name__ == "__main__":
    demo.launch()