File size: 2,805 Bytes
bbd5c76
 
06323bb
 
 
 
 
 
 
 
 
 
 
3421ed4
 
 
 
 
6761a81
3421ed4
6b7515b
06323bb
 
8735569
06323bb
717d3b5
6b7515b
717d3b5
bbd5c76
3421ed4
717d3b5
 
3421ed4
bbd5c76
6b7515b
 
 
bbd5c76
3421ed4
717d3b5
6761a81
717d3b5
 
 
6761a81
f5e2959
6b7515b
717d3b5
6761a81
 
f5e2959
 
717d3b5
f5e2959
717d3b5
 
 
 
 
 
f5e2959
 
3421ed4
fdefcb0
6761a81
9eb3d05
619f1a8
 
 
 
 
6761a81
beacea3
6b7515b
3421ed4
bbd5c76
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import gradio as gr
from huggingface_hub import InferenceClient
import requests
from bs4 import BeautifulSoup
from bs4.element import Comment

def tag_visible(element):
    if element.parent.name in ['style', 'script', 'head', 'title', 'meta', '[document]']:
        return False
    if isinstance(element, Comment):
        return False
    return True

def get_text_from_url(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    texts = soup.find_all(text=True)
    visible_texts = filter(tag_visible, texts)
    return "\n".join(t.strip() for t in visible_texts)

# Pre-fetch and truncate homepage text to reduce prompt length
text_list = []
homepage_url = "https://sites.google.com/view/abhilashnandy/home/"
extensions = ["", "pmrf-profile-page"]
for ext in extensions:
    full_text = get_text_from_url(homepage_url + ext)
    truncated_text = full_text[:1000]  # using first 1000 characters to keep prompt short
    text_list.append(truncated_text)

SYSTEM_MESSAGE = (
    "You are a QA chatbot to answer queries (in less than 30 words) on my homepage. "
    "Context: " + " ".join(text_list)
)

# Switch to a model optimized for low-latency CPU inference.
# Here we use a GPT4All model (assuming one is available via the Inference API).
client = InferenceClient("nomic-ai/gpt4all-lora")

def respond(message, history: list[tuple[str, str]], system_message=SYSTEM_MESSAGE,
            max_tokens=100, temperature=0.7, top_p=0.95):
    messages = [{"role": "system", "content": system_message}]
    for q, a in history:
        messages.append({"role": "user", "content": "Question: " + q})
        messages.append({"role": "assistant", "content": "Answer: " + a})
    messages.append({"role": "user", "content": message})
    try:
        # Use streaming mode to return tokens as they are generated
        response_stream = client.chat_completion(
            messages,
            max_tokens=max_tokens,
            temperature=temperature,
            top_p=top_p,
            stream=True,
        )
        output = ""
        for chunk in response_stream:
            if hasattr(chunk, "choices") and chunk.choices:
                part = chunk.choices[0].message.get("content", "")
                output += part
        return output.strip()
    except Exception as e:
        print(f"An error occurred: {e}")
        return str(e)

initial_message = [("user", "Yo who dis Abhilash?")]
markdown_note = "## Ask Anything About Me! (Might show a tad bit of hallucination!)"

demo = gr.Blocks()
with demo:
    gr.Markdown(markdown_note)
    gr.ChatInterface(
        fn=respond,
        # examples=["Yo who dis Abhilash?", "What is Abhilash's most recent publication?"],
        additional_inputs=[],
    )

if __name__ == "__main__":
    demo.launch()