WebBot / app.py
Koomemartin's picture
Update app.py
daf469b verified
raw
history blame
3.69 kB
import streamlit as st
from bs4 import BeautifulSoup
import requests
from groq import Groq
# Define the Website class added
#Parse webpages which is designed using JavaScript heavely
# download the chorme driver from here as per your version of chrome - https://developer.chrome.com/docs/chromedriver/downloads
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import os
PATH_TO_CHROME_DRIVER = os.path.join(os.getcwd(),'chromedriver.exe')
log_file= os.path.join(os.getcwd(),'logs')
class Website:
url: str
title: str
text: str
def __init__(self, url):
self.url = url
options = Options()
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
service = Service(executable_path=PATH_TO_CHROME_DRIVER , log_output=log_file)
driver = webdriver.Chrome(service=service)
driver.get(url)
# input("Please complete the verification in the browser and press Enter to continue...")
page_source = driver.page_source
driver.quit()
soup = BeautifulSoup(page_source, 'html.parser')
self.title = soup.title.string if soup.title else "No title found"
for irrelevant in soup(["script", "style", "img", "input"]):
irrelevant.decompose()
self.text = soup.get_text(separator="\n", strip=True)
# Initialize Groq client
api_key = "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"
client = Groq(api_key=api_key)
# Streamlit UI
st.title("Welcome to WebBot")
st.write("Enter a website URL and ask questions about its content!")
# Input fields
url = st.text_input("Website URL:", "https://example.com or .ac.ke " )
user_query = st.text_area("What would you like to know about this website")
if st.button("Submit"):
# Scrape website content
with st.spinner("Scraping website..."):
website = Website(url)
if "Error" in website.title:
st.error("Failed to load the website. Please check the URL.")
else:
st.success("Website loaded successfully!")
st.write(f"**Website Title:** {website.title}")
# Call Groq API for processing
st.write("Querying the website...")
with st.spinner("Processing your query..."):
try:
chat_streaming = client.chat.completions.create(
messages=[
{"role": "system", "content": "You are a helpful assistant specializing in extracting and analyzing website content. Answer questions based on the provided website's content. Ensure responses are clear, concise, and formatted in Markdown for better readability."},
{"role": "user", "content": f"{user_query} Here's the content:\n{website.text}"}
],
model="llama3-groq-70b-8192-tool-use-preview",
temperature=0.3,
max_tokens=1200,
top_p=1,
stream=True,
)
st.write('Passed model')
except Exception as e:
st.error(f"Failed to process query: {e}")
response = ""
try:
for chunk in chat_streaming:
content = chunk.choices[0].delta.content
if content: # Ensure content is not None
response += content
st.write("### Response:")
st.write(response)
except Exception as e:
st.error(f"Failed to process query: {e}")