import streamlit as st from bs4 import BeautifulSoup import requests from groq import Groq # Define the Website class added #Parse webpages which is designed using JavaScript heavely # download the chorme driver from here as per your version of chrome - https://developer.chrome.com/docs/chromedriver/downloads from selenium import webdriver from selenium.webdriver.chrome.service import Service from selenium.webdriver.common.by import By from selenium.webdriver.chrome.options import Options import os PATH_TO_CHROME_DRIVER = os.path.join(os.getcwd(),'chromedriver.exe') log_file= os.path.join(os.getcwd(),'logs') class Website: url: str title: str text: str def __init__(self, url): self.url = url options = Options() options.add_argument("--no-sandbox") options.add_argument("--disable-dev-shm-usage") service = Service(executable_path=PATH_TO_CHROME_DRIVER , log_output=log_file) driver = webdriver.Chrome(service=service) driver.get(url) # input("Please complete the verification in the browser and press Enter to continue...") page_source = driver.page_source driver.quit() soup = BeautifulSoup(page_source, 'html.parser') self.title = soup.title.string if soup.title else "No title found" for irrelevant in soup(["script", "style", "img", "input"]): irrelevant.decompose() self.text = soup.get_text(separator="\n", strip=True) # Initialize Groq client api_key = "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" client = Groq(api_key=api_key) # Streamlit UI st.title("Welcome to WebBot") st.write("Enter a website URL and ask questions about its content!") # Input fields url = st.text_input("Website URL:", "https://example.com or .ac.ke " ) user_query = st.text_area("What would you like to know about this website") if st.button("Submit"): # Scrape website content with st.spinner("Scraping website..."): website = Website(url) if "Error" in website.title: st.error("Failed to load the website. Please check the URL.") else: st.success("Website loaded successfully!") st.write(f"**Website Title:** {website.title}") # Call Groq API for processing st.write("Querying the website...") with st.spinner("Processing your query..."): try: chat_streaming = client.chat.completions.create( messages=[ {"role": "system", "content": "You are a helpful assistant specializing in extracting and analyzing website content. Answer questions based on the provided website's content. Ensure responses are clear, concise, and formatted in Markdown for better readability."}, {"role": "user", "content": f"{user_query} Here's the content:\n{website.text}"} ], model="llama3-groq-70b-8192-tool-use-preview", temperature=0.3, max_tokens=1200, top_p=1, stream=True, ) st.write('Passed model') except Exception as e: st.error(f"Failed to process query: {e}") response = "" try: for chunk in chat_streaming: content = chunk.choices[0].delta.content if content: # Ensure content is not None response += content st.write("### Response:") st.write(response) except Exception as e: st.error(f"Failed to process query: {e}")