import gradio as gr from huggingface_hub import InferenceClient import requests from bs4 import BeautifulSoup from bs4.element import Comment def tag_visible(element): if element.parent.name in ['style', 'script', 'head', 'title', 'meta', '[document]']: return False if isinstance(element, Comment): return False return True def get_text_from_url(url): response = requests.get(url) soup = BeautifulSoup(response.text, 'html.parser') texts = soup.find_all(text=True) visible_texts = filter(tag_visible, texts) return "\n".join(t.strip() for t in visible_texts) # Pre-fetch and truncate homepage text to reduce the prompt length. text_list = [] homepage_url = "https://sites.google.com/view/abhilashnandy/home/" extensions = ["", "pmrf-profile-page"] for ext in extensions: full_text = get_text_from_url(homepage_url + ext) truncated_text = full_text[:1000] # using only the first 1000 characters per extension text_list.append(truncated_text) SYSTEM_MESSAGE = ( "You are a QA chatbot to answer queries (in less than 30 words) on my homepage. " "Context: " + " ".join(text_list) ) # Create a Hugging Face Inference client using a CPU-friendly model. # Here we use 'google/flan-t5-base' as an example; you can adjust the model if needed. client = InferenceClient(model="google/flan-t5-base") def answer_query(query): # Compose a prompt using the system message, user query, and a reminder for a short answer. prompt = SYSTEM_MESSAGE + "\nUser: " + query + "\nAnswer in less than 30 words:" # Generate answer with a limit on new tokens to ensure brevity. result = client.text_generation(prompt, max_new_tokens=60) # Handle both list or direct string responses from the inference client. if isinstance(result, list): answer = result[0].get("generated_text", "") else: answer = result return answer.strip() iface = gr.Interface( fn=answer_query, inputs=gr.Textbox(lines=2, placeholder="Enter your question here..."), outputs="text", title="Homepage QA Chatbot", description="A chatbot answering queries about the homepage using pre-fetched context." ) if __name__ == '__main__': iface.launch()