Spaces:
Runtime error
Runtime error
import streamlit as st | |
from bs4 import BeautifulSoup | |
import requests | |
from groq import Groq | |
# Define the Website class added | |
#Parse webpages which is designed using JavaScript heavely | |
# download the chorme driver from here as per your version of chrome - https://developer.chrome.com/docs/chromedriver/downloads | |
from selenium import webdriver | |
from selenium.webdriver.chrome.service import Service | |
from selenium.webdriver.common.by import By | |
from selenium.webdriver.chrome.options import Options | |
import os | |
PATH_TO_CHROME_DRIVER = os.path.join(os.getcwd(),'chromedriver.exe') | |
log_file= os.path.join(os.getcwd(),'logs') | |
class Website: | |
url: str | |
title: str | |
text: str | |
def __init__(self, url): | |
self.url = url | |
options = Options() | |
options.add_argument("--no-sandbox") | |
options.add_argument("--disable-dev-shm-usage") | |
service = Service(executable_path=PATH_TO_CHROME_DRIVER , log_output=log_file) | |
driver = webdriver.Chrome(service=service) | |
driver.get(url) | |
# input("Please complete the verification in the browser and press Enter to continue...") | |
page_source = driver.page_source | |
driver.quit() | |
soup = BeautifulSoup(page_source, 'html.parser') | |
self.title = soup.title.string if soup.title else "No title found" | |
for irrelevant in soup(["script", "style", "img", "input"]): | |
irrelevant.decompose() | |
self.text = soup.get_text(separator="\n", strip=True) | |
# Initialize Groq client | |
api_key = "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" | |
client = Groq(api_key=api_key) | |
# Streamlit UI | |
st.title("Welcome to WebBot") | |
st.write("Enter a website URL and ask questions about its content!") | |
# Input fields | |
url = st.text_input("Website URL:", "https://example.com or .ac.ke " ) | |
user_query = st.text_area("What would you like to know about this website") | |
if st.button("Submit"): | |
# Scrape website content | |
with st.spinner("Scraping website..."): | |
website = Website(url) | |
if "Error" in website.title: | |
st.error("Failed to load the website. Please check the URL.") | |
else: | |
st.success("Website loaded successfully!") | |
st.write(f"**Website Title:** {website.title}") | |
# Call Groq API for processing | |
st.write("Querying the website...") | |
with st.spinner("Processing your query..."): | |
try: | |
chat_streaming = client.chat.completions.create( | |
messages=[ | |
{"role": "system", "content": "You are a helpful assistant specializing in extracting and analyzing website content. Answer questions based on the provided website's content. Ensure responses are clear, concise, and formatted in Markdown for better readability."}, | |
{"role": "user", "content": f"{user_query} Here's the content:\n{website.text}"} | |
], | |
model="llama3-groq-70b-8192-tool-use-preview", | |
temperature=0.3, | |
max_tokens=1200, | |
top_p=1, | |
stream=True, | |
) | |
st.write('Passed model') | |
except Exception as e: | |
st.error(f"Failed to process query: {e}") | |
response = "" | |
try: | |
for chunk in chat_streaming: | |
content = chunk.choices[0].delta.content | |
if content: # Ensure content is not None | |
response += content | |
st.write("### Response:") | |
st.write(response) | |
except Exception as e: | |
st.error(f"Failed to process query: {e}") |