Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from bs4 import BeautifulSoup | |
| import requests | |
| from groq import Groq | |
| # Define the Website class added | |
| #Parse webpages which is designed using JavaScript heavely | |
| # download the chorme driver from here as per your version of chrome - https://developer.chrome.com/docs/chromedriver/downloads | |
| from selenium import webdriver | |
| from selenium.webdriver.chrome.service import Service | |
| from selenium.webdriver.common.by import By | |
| from selenium.webdriver.chrome.options import Options | |
| import os | |
| PATH_TO_CHROME_DRIVER = os.path.join(os.getcwd(),'chromedriver.exe') | |
| log_file= os.path.join(os.getcwd(),'logs') | |
| class Website: | |
| url: str | |
| title: str | |
| text: str | |
| def __init__(self, url): | |
| self.url = url | |
| options = Options() | |
| options.add_argument("--no-sandbox") | |
| options.add_argument("--disable-dev-shm-usage") | |
| service = Service(executable_path=PATH_TO_CHROME_DRIVER , log_output=log_file) | |
| driver = webdriver.Chrome(service=service) | |
| driver.get(url) | |
| # input("Please complete the verification in the browser and press Enter to continue...") | |
| page_source = driver.page_source | |
| driver.quit() | |
| soup = BeautifulSoup(page_source, 'html.parser') | |
| self.title = soup.title.string if soup.title else "No title found" | |
| for irrelevant in soup(["script", "style", "img", "input"]): | |
| irrelevant.decompose() | |
| self.text = soup.get_text(separator="\n", strip=True) | |
| # Initialize Groq client | |
| api_key = "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" | |
| client = Groq(api_key=api_key) | |
| # Streamlit UI | |
| st.title("Welcome to WebBot") | |
| st.write("Enter a website URL and ask questions about its content!") | |
| # Input fields | |
| url = st.text_input("Website URL:", "https://example.com or .ac.ke " ) | |
| user_query = st.text_area("What would you like to know about this website") | |
| if st.button("Submit"): | |
| # Scrape website content | |
| with st.spinner("Scraping website..."): | |
| website = Website(url) | |
| if "Error" in website.title: | |
| st.error("Failed to load the website. Please check the URL.") | |
| else: | |
| st.success("Website loaded successfully!") | |
| st.write(f"**Website Title:** {website.title}") | |
| # Call Groq API for processing | |
| st.write("Querying the website...") | |
| with st.spinner("Processing your query..."): | |
| try: | |
| chat_streaming = client.chat.completions.create( | |
| messages=[ | |
| {"role": "system", "content": "You are a helpful assistant specializing in extracting and analyzing website content. Answer questions based on the provided website's content. Ensure responses are clear, concise, and formatted in Markdown for better readability."}, | |
| {"role": "user", "content": f"{user_query} Here's the content:\n{website.text}"} | |
| ], | |
| model="llama3-groq-70b-8192-tool-use-preview", | |
| temperature=0.3, | |
| max_tokens=1200, | |
| top_p=1, | |
| stream=True, | |
| ) | |
| st.write('Passed model') | |
| except Exception as e: | |
| st.error(f"Failed to process query: {e}") | |
| response = "" | |
| try: | |
| for chunk in chat_streaming: | |
| content = chunk.choices[0].delta.content | |
| if content: # Ensure content is not None | |
| response += content | |
| st.write("### Response:") | |
| st.write(response) | |
| except Exception as e: | |
| st.error(f"Failed to process query: {e}") |