Koomemartin commited on
Commit
24f1a4f
·
verified ·
1 Parent(s): e71a975

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -12
app.py CHANGED
@@ -6,7 +6,7 @@ import os
6
  from dotenv import load_dotenv
7
  import json
8
 
9
-
10
  class Website:
11
  """
12
  A utility class to represent a Website that we have scraped, now with links
@@ -33,9 +33,10 @@ class Website:
33
 
34
  # first lets get relevant links from the home page for a broad information about the website provided
35
 
 
36
  link_system_prompt = "You are provided with a list of links found on a webpage. \
37
- You are able to decide which of the links would be most relevant to the company, \
38
- such as links to an About page, or a Company page, or Careers/Jobs pages.\n"
39
  link_system_prompt += "You should respond in JSON as in this example: \n"
40
  link_system_prompt += """
41
  {
@@ -46,15 +47,16 @@ link_system_prompt += """
46
  }
47
  """
48
 
 
49
  def get_links_user_prompt(website):
50
  user_prompt = f"Here is the list of links on the website of {website.url} - "
51
- user_prompt += "please decide which of these are relevant web links about the company, respond with the full https URL in JSON format. \
52
- Do not include Terms of Service, Privacy\n"
53
  user_prompt += "Links (some might be relative links):\n"
54
  user_prompt += "\n".join(website.links)
55
  return user_prompt
56
 
57
-
58
  def get_links(url):
59
  website = Website(url)
60
  response = client.chat.completions.create(
@@ -71,6 +73,7 @@ def get_links(url):
71
  result = response.choices[0].message.content
72
  return json.loads(result)
73
 
 
74
  @st.cache_resource
75
  def get_all_details(url):
76
  result = "Home page:\n"
@@ -97,8 +100,6 @@ url = st.text_input("Website URL:", " " )
97
  user_query = st.text_area("What would you like to know about this website")
98
 
99
  if user_query:
100
-
101
-
102
  # Scrape website content
103
  with st.spinner("Scraping website..."):
104
 
@@ -108,15 +109,14 @@ if user_query:
108
  except Exception as e:
109
  st.error(f"Failed to load website: {e}")
110
 
111
-
112
- # Call Groq API for processing
113
  st.write("Querying the website...")
114
  with st.spinner("Processing your query..."):
115
  try:
116
  chat_streaming = client.chat.completions.create(
117
  messages=[
118
- {"role": "system", "content": "You are a helpful assistant specializing in extracting and analyzing website content. Answer questions based on the provided website's content. Ensure responses are clear, concise, and formatted in Markdown for better readability. use your knowledge to add relevant inforation to thr users query"},
119
- {"role": "user", "content": f"{user_query} \n Here's the content:\n{website}"}
120
  ],
121
  model="llama3-groq-70b-8192-tool-use-preview",
122
  temperature=0.9,
 
6
  from dotenv import load_dotenv
7
  import json
8
 
9
+ # scraping pipeline
10
  class Website:
11
  """
12
  A utility class to represent a Website that we have scraped, now with links
 
33
 
34
  # first lets get relevant links from the home page for a broad information about the website provided
35
 
36
+ # system prompt of the first call
37
  link_system_prompt = "You are provided with a list of links found on a webpage. \
38
+ You are able to decide which of the links would be most relevant to the website, \
39
+ such as links to an About page, or a Company page, or Careers/Jobs pages. Kindly choose the top seven links that look to provide more information about the website\n"
40
  link_system_prompt += "You should respond in JSON as in this example: \n"
41
  link_system_prompt += """
42
  {
 
47
  }
48
  """
49
 
50
+ #pre defined user prompt to extract only important links in about the website
51
  def get_links_user_prompt(website):
52
  user_prompt = f"Here is the list of links on the website of {website.url} - "
53
+ user_prompt += "please decide which of these are relevant web links to the website, respond with the full https URL in JSON format. \
54
+ Do not include Terms of Service, Privacy\n"
55
  user_prompt += "Links (some might be relative links):\n"
56
  user_prompt += "\n".join(website.links)
57
  return user_prompt
58
 
59
+ # make the first call to get the important links
60
  def get_links(url):
61
  website = Website(url)
62
  response = client.chat.completions.create(
 
73
  result = response.choices[0].message.content
74
  return json.loads(result)
75
 
76
+ #all the content required to generate information from user about the website
77
  @st.cache_resource
78
  def get_all_details(url):
79
  result = "Home page:\n"
 
100
  user_query = st.text_area("What would you like to know about this website")
101
 
102
  if user_query:
 
 
103
  # Scrape website content
104
  with st.spinner("Scraping website..."):
105
 
 
109
  except Exception as e:
110
  st.error(f"Failed to load website: {e}")
111
 
112
+ # Second to Call Groq API for processing
 
113
  st.write("Querying the website...")
114
  with st.spinner("Processing your query..."):
115
  try:
116
  chat_streaming = client.chat.completions.create(
117
  messages=[
118
+ {"role": "system", "content": "You are a helpful assistant specializing in extracting and analyzing website content. Provide information required by the user based on the website information provided. Ensure responses are clear, concise, and formatted in Markdown for better readability. use your knowledge to add relevant inforation to the users query"},
119
+ {"role": "user", "content": f"Here's the content to use:\n{website} \n Know respond appropriately: {user_query}"}
120
  ],
121
  model="llama3-groq-70b-8192-tool-use-preview",
122
  temperature=0.9,