File size: 3,691 Bytes
71c801a
 
 
 
 
8f012b4
8dba809
 
 
 
 
 
daf469b
8dba809
daf469b
 
 
8dba809
71c801a
8dba809
 
 
 
71c801a
 
8dba809
 
 
 
 
 
daf469b
8dba809
 
 
 
 
 
 
 
 
 
 
 
71c801a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1f80fc1
d308e6d
 
 
598294b
b2692e3
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import streamlit as st
from bs4 import BeautifulSoup
import requests
from groq import Groq

# Define the Website class added
#Parse webpages which is designed using JavaScript heavely
# download the chorme driver from here as per your version of chrome - https://developer.chrome.com/docs/chromedriver/downloads
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import os

PATH_TO_CHROME_DRIVER = os.path.join(os.getcwd(),'chromedriver.exe')

log_file= os.path.join(os.getcwd(),'logs')

class Website:
    url: str
    title: str
    text: str

    def __init__(self, url):
        self.url = url

        options = Options()

        options.add_argument("--no-sandbox")
        options.add_argument("--disable-dev-shm-usage")

        service = Service(executable_path=PATH_TO_CHROME_DRIVER , log_output=log_file)
        driver = webdriver.Chrome(service=service)
        driver.get(url)

        # input("Please complete the verification in the browser and press Enter to continue...")
        page_source = driver.page_source
        driver.quit()

        soup = BeautifulSoup(page_source, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        for irrelevant in soup(["script", "style", "img", "input"]):
            irrelevant.decompose()
        self.text = soup.get_text(separator="\n", strip=True)

# Initialize Groq client
api_key = "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"
client = Groq(api_key=api_key)

# Streamlit UI
st.title("Welcome to WebBot")
st.write("Enter a website URL and ask questions about its content!")

# Input fields
url = st.text_input("Website URL:", "https://example.com  or .ac.ke " )
user_query = st.text_area("What would you like to know about this website")

if st.button("Submit"):
    # Scrape website content
    with st.spinner("Scraping website..."):
        website = Website(url)
    
    if "Error" in website.title:
        st.error("Failed to load the website. Please check the URL.")
    else:
        st.success("Website loaded successfully!")
        st.write(f"**Website Title:** {website.title}")

        # Call Groq API for processing
        st.write("Querying the website...")
        with st.spinner("Processing your query..."):
            try:
                chat_streaming = client.chat.completions.create(
                    messages=[
                        {"role": "system", "content": "You are a helpful assistant specializing in extracting and analyzing website content. Answer questions based on the provided website's content. Ensure responses are clear, concise, and formatted in Markdown for better readability."},
                        {"role": "user", "content": f"{user_query} Here's the content:\n{website.text}"}
                    ],
                    model="llama3-groq-70b-8192-tool-use-preview",
                    temperature=0.3,
                    max_tokens=1200,
                    top_p=1,
                    stream=True,
                )
                st.write('Passed model')

            except Exception as e:
                st.error(f"Failed to process query: {e}")
            response = ""
            try:
                for chunk in chat_streaming:
                    content = chunk.choices[0].delta.content
                    if content:  # Ensure content is not None
                        response += content
                st.write("### Response:")
                st.write(response)
            except Exception as e:
                st.error(f"Failed to process query: {e}")