File size: 2,093 Bytes
bbd5c76
06323bb
b4b7a21
 
06323bb
b4b7a21
06323bb
 
 
 
 
 
 
3421ed4
b4b7a21
3421ed4
 
 
b4b7a21
3421ed4
b4b7a21
06323bb
 
8735569
b4b7a21
06323bb
b4b7a21
 
 
 
 
 
bbd5c76
b4b7a21
bbd5c76
b4b7a21
 
b9fcfca
b4b7a21
b9fcfca
b4b7a21
 
 
 
 
 
 
 
 
 
 
b9fcfca
 
b4b7a21
b9fcfca
 
b4b7a21
b9fcfca
bbd5c76
b9fcfca
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import gradio as gr
import requests
from bs4 import BeautifulSoup, Comment
from llama_cpp import Llama

# Function to extract visible text from a webpage
def tag_visible(element):
    if element.parent.name in ['style', 'script', 'head', 'title', 'meta', '[document]']:
        return False
    if isinstance(element, Comment):
        return False
    return True

def get_text_from_url(url):
    response = requests.get(url, timeout=10)
    soup = BeautifulSoup(response.text, 'html.parser')
    texts = soup.find_all(text=True)
    visible_texts = filter(tag_visible, texts)
    return " ".join(t.strip() for t in visible_texts)

# Pre-fetch and truncate homepage text
text_list = []
homepage_url = "https://sites.google.com/view/abhilashnandy/home/"
extensions = ["", "pmrf-profile-page"]

for ext in extensions:
    try:
        full_text = get_text_from_url(homepage_url + ext)
        truncated_text = full_text[:2000]  # Using first 2000 characters for more context
        text_list.append(truncated_text)
    except Exception as e:
        text_list.append(f"Error fetching {homepage_url+ext}: {str(e)}")

CONTEXT = " ".join(text_list)

# Load the Mistral model (low-latency, CPU optimized)
llm = Llama(model_path="mistral-7b-instruct-v0.1.Q4_K_M.gguf", n_ctx=4096, n_threads=6, verbose=False)

# Function to answer queries
def answer_query(query):
    prompt = (
        "You are an AI chatbot answering queries based on the homepage of Abhilash Nandy. "
        "Your responses should be concise (under 30 words) and directly relevant to the provided context.\n\n"
        f"Context: {CONTEXT}\n\nUser: {query}\nAI:"
    )
    
    response = llm(prompt, max_tokens=50, stop=["\nUser:", "\nAI:"], echo=False)
    
    return response["choices"][0]["text"].strip()

# Gradio Interface
iface = gr.Interface(
    fn=answer_query,
    inputs=gr.Textbox(lines=2, placeholder="Ask a question about Abhilash Nandy..."),
    outputs="text",
    title="Homepage QA Chatbot",
    description="Ask me anything about Abhilash Nandy's homepage."
)

if __name__ == '__main__':
    iface.launch()