Koomemartin commited on
Commit
9eaae75
·
verified ·
1 Parent(s): 4028fa6

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +166 -0
app.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from bs4 import BeautifulSoup
3
+ import requests
4
+ from groq import Groq
5
+ import os
6
+ from dotenv import load_dotenv
7
+ import json
8
+
9
+ # scraping pipeline
10
+ class Website:
11
+ """
12
+ A utility class to represent a Website that we have scraped
13
+ """
14
+
15
+ def __init__(self, url):
16
+ self.url = url
17
+ response = requests.get(url)
18
+ self.body = response.content
19
+ soup = BeautifulSoup(self.body, 'html.parser')
20
+ self.title = soup.title.string if soup.title else "No title found"
21
+ if soup.body:
22
+ for irrelevant in soup.body(["script", "style", "img", "input"]):
23
+ irrelevant.decompose()
24
+ self.text = soup.body.get_text(separator="\n", strip=True)
25
+ else:
26
+ self.text = ""
27
+ links = [link.get('href') for link in soup.find_all('a')] # links found in home page
28
+ self.links = [link for link in links if link]
29
+
30
+ def get_contents(self):
31
+ return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"
32
+
33
+
34
+ # first lets get relevant links from the home page for a broad information about the website provided
35
+
36
+ # system prompt of the first call
37
+ link_system_prompt = "You are provided with a list of links found on a webpage. \
38
+ You are able to decide which of the links would be most relevant to include in a brochure about the company, \
39
+ such as links to an About page, or a Company page, or Careers/Jobs pages.\n"
40
+ link_system_prompt += "You should respond in JSON as in this example:"
41
+ link_system_prompt += """
42
+ {
43
+ "links": [
44
+ {"type": "about page", "url": "https://full.url/goes/here/about"},
45
+ {"type": "careers page": "url": "https://another.full.url/careers"},
46
+ {"type": "contanct us" : "url" : "mailto:[email protected]"}
47
+ ]
48
+ }
49
+ """
50
+
51
+ #pre defined user prompt to extract only important links in about the website
52
+ def get_links_user_prompt(website):
53
+ user_prompt = f"Here is the list of links on the website of {website.url} - "
54
+ user_prompt += "please decide which of these are relevant web links to the website, respond with the full https URL in JSON format. \
55
+ Do not include Terms of Service, Privacy\n"
56
+ user_prompt += "Links (some might be relative links):\n"
57
+ user_prompt += "\n".join(website.links)
58
+ return user_prompt
59
+
60
+ # make the first call to get the important links
61
+ def get_links(url):
62
+ website = Website(url)
63
+ response = client.chat.completions.create(
64
+ messages=[
65
+ {"role": "system", "content":link_system_prompt },
66
+ {"role": "user", "content": get_links_user_prompt(website)}
67
+ ],
68
+ model="llama3-groq-70b-8192-tool-use-preview",
69
+ temperature=1,
70
+ max_tokens=2048,
71
+ stop=None,
72
+ stream=False,
73
+ response_format = {"type" : "json_object" })
74
+ result = response.choices[0].message.content
75
+ return json.loads(result)
76
+
77
+ #all the content required to generate information from user about the website
78
+ @st.cache_resource
79
+ def get_all_details(url):
80
+ result = "Home page:\n"
81
+ result += Website(url).get_contents()
82
+ links = get_links(url)
83
+ print("Available links:", links)
84
+ for link in links["links"]:
85
+ result += f"\n\n{link['type']}\n"
86
+ result += Website(link["url"]).get_contents()
87
+ return result
88
+
89
+
90
+ system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
91
+ and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
92
+ Include details of company culture, customers and careers/jobs if you have the information."
93
+
94
+ def second_call_sytem_prompt(system):
95
+ if len(system) == 0:
96
+ return system_prompt
97
+ else:
98
+ return system
99
+
100
+
101
+ def get_brochure_user_prompt(company_name, url):
102
+ user_prompt = f"You are looking at a company called: {company_name}\n"
103
+ user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown and provide usable links in the contacts areas \n"
104
+ user_prompt += get_all_details(url)
105
+ user_prompt = user_prompt[:20_000] # Truncate if more than 20,000 characters
106
+ return user_prompt
107
+
108
+ # Initialize Groq client
109
+ # load_dotenv()
110
+ api_key = os.getenv('GROQ_API_KEY')
111
+ client = Groq(api_key=api_key)
112
+
113
+ # Streamlit UI
114
+ st.title("AI Brochures 🎨📌")
115
+ st.write("Create a captivating brochure of your company or institution by only using information from your website!!")
116
+
117
+ # Input fields
118
+ system= st.text_input("Modify the model response using a custom system prompt if not satisfied with generated response": , " " )
119
+ url = st.text_input("Provide the Company's website URL:", " " )
120
+ user_query = st.text_area("Provide a title for the brochure or the name of the organization")
121
+
122
+ if user_query:
123
+ # Scrape website content
124
+ with st.spinner("Scraping website..."):
125
+
126
+ try:
127
+ second_user_prompt = get_brochure_user_prompt(user_query, url)
128
+ st.success("Website loaded successfully!")
129
+ except Exception as e:
130
+ st.error(f"Failed to load website: {e}")
131
+
132
+ # Second to Call Groq API for processing
133
+ st.write("Querying the website...")
134
+ with st.spinner("Processing your query..."):
135
+ try:
136
+ chat_streaming = client.chat.completions.create(
137
+ messages=[
138
+ {"role": "system", "content": second_call_sytem_prompt()},
139
+ {"role": "user", "content": second_user_prompt}
140
+ ],
141
+ model="llama3-groq-70b-8192-tool-use-preview",
142
+ temperature=0.8,
143
+ max_tokens=2042,
144
+ top_p=0.6,
145
+ stream=False,
146
+ )
147
+ # st.write('Passed model')
148
+
149
+ except Exception as e:
150
+ st.error(f"Failed to process query to model: {e}")
151
+ response = ""
152
+ try:
153
+ # for chunk in chat_streaming:
154
+ # content = chunk.choices[0].delta.content
155
+ # if content: # Ensure content is not None
156
+ response=chat_streaming.choices[0].message.content
157
+ # response += content
158
+ st.write("🤖:")
159
+ st.write(response)
160
+ except Exception as e:
161
+ st.error(f"Failed to process query: {e}")
162
+
163
+
164
+
165
+ st.markdown("--------------")
166
+ st.write("© 2024 Application")