Spaces:
Running
Running
File size: 3,691 Bytes
71c801a 8f012b4 8dba809 daf469b 8dba809 daf469b 8dba809 71c801a 8dba809 71c801a 8dba809 daf469b 8dba809 71c801a 1f80fc1 d308e6d 598294b b2692e3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
import streamlit as st
from bs4 import BeautifulSoup
import requests
from groq import Groq
# Define the Website class added
#Parse webpages which is designed using JavaScript heavely
# download the chorme driver from here as per your version of chrome - https://developer.chrome.com/docs/chromedriver/downloads
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import os
PATH_TO_CHROME_DRIVER = os.path.join(os.getcwd(),'chromedriver.exe')
log_file= os.path.join(os.getcwd(),'logs')
class Website:
url: str
title: str
text: str
def __init__(self, url):
self.url = url
options = Options()
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
service = Service(executable_path=PATH_TO_CHROME_DRIVER , log_output=log_file)
driver = webdriver.Chrome(service=service)
driver.get(url)
# input("Please complete the verification in the browser and press Enter to continue...")
page_source = driver.page_source
driver.quit()
soup = BeautifulSoup(page_source, 'html.parser')
self.title = soup.title.string if soup.title else "No title found"
for irrelevant in soup(["script", "style", "img", "input"]):
irrelevant.decompose()
self.text = soup.get_text(separator="\n", strip=True)
# Initialize Groq client
api_key = "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"
client = Groq(api_key=api_key)
# Streamlit UI
st.title("Welcome to WebBot")
st.write("Enter a website URL and ask questions about its content!")
# Input fields
url = st.text_input("Website URL:", "https://example.com or .ac.ke " )
user_query = st.text_area("What would you like to know about this website")
if st.button("Submit"):
# Scrape website content
with st.spinner("Scraping website..."):
website = Website(url)
if "Error" in website.title:
st.error("Failed to load the website. Please check the URL.")
else:
st.success("Website loaded successfully!")
st.write(f"**Website Title:** {website.title}")
# Call Groq API for processing
st.write("Querying the website...")
with st.spinner("Processing your query..."):
try:
chat_streaming = client.chat.completions.create(
messages=[
{"role": "system", "content": "You are a helpful assistant specializing in extracting and analyzing website content. Answer questions based on the provided website's content. Ensure responses are clear, concise, and formatted in Markdown for better readability."},
{"role": "user", "content": f"{user_query} Here's the content:\n{website.text}"}
],
model="llama3-groq-70b-8192-tool-use-preview",
temperature=0.3,
max_tokens=1200,
top_p=1,
stream=True,
)
st.write('Passed model')
except Exception as e:
st.error(f"Failed to process query: {e}")
response = ""
try:
for chunk in chat_streaming:
content = chunk.choices[0].delta.content
if content: # Ensure content is not None
response += content
st.write("### Response:")
st.write(response)
except Exception as e:
st.error(f"Failed to process query: {e}") |