siddhartharya commited on
Commit
a4303b2
·
verified ·
1 Parent(s): 314bf31

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +175 -11
app.py CHANGED
@@ -1,4 +1,5 @@
1
  # app.py
 
2
  import gradio as gr
3
  from bs4 import BeautifulSoup
4
  import requests
@@ -14,37 +15,200 @@ index = None
14
  bookmarks = []
15
  fetch_cache = {}
16
 
17
- # Helper functions as defined above...
18
 
19
  def parse_bookmarks(file_content):
20
- # [Code from Step 4.1]
 
 
 
 
 
 
 
21
 
22
  def fetch_url_info(bookmark):
23
- # [Code from Step 4.2]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  def generate_summary(bookmark):
26
- # [Code from Step 4.3]
 
 
 
 
 
 
 
 
27
 
28
  def vectorize_and_index(bookmarks):
29
- # [Code from Step 4.4]
 
 
 
 
 
30
 
31
  def process_uploaded_file(file):
32
- # [Code from Step 5.1]
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
  def chatbot_response(user_query):
35
- # [Code from Step 5.2]
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
  def display_bookmarks():
38
- # [Code from Step 5.3]
 
 
 
 
39
 
40
  def edit_bookmark(index, new_title, new_url):
41
- # [Code from Step 5.3]
 
 
 
 
 
 
 
 
 
 
 
42
 
43
  def delete_bookmark(index):
44
- # [Code from Step 5.3]
 
 
 
 
 
 
 
 
 
 
 
45
 
46
  def build_app():
47
- # [Code from Step 6]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
  if __name__ == "__main__":
50
  build_app()
 
1
  # app.py
2
+
3
  import gradio as gr
4
  from bs4 import BeautifulSoup
5
  import requests
 
15
  bookmarks = []
16
  fetch_cache = {}
17
 
18
+ # Helper functions
19
 
20
  def parse_bookmarks(file_content):
21
+ soup = BeautifulSoup(file_content, 'html.parser')
22
+ extracted_bookmarks = []
23
+ for link in soup.find_all('a'):
24
+ url = link.get('href')
25
+ title = link.text
26
+ if url and title:
27
+ extracted_bookmarks.append({'url': url, 'title': title})
28
+ return extracted_bookmarks
29
 
30
  def fetch_url_info(bookmark):
31
+ url = bookmark['url']
32
+ if url in fetch_cache:
33
+ bookmark.update(fetch_cache[url])
34
+ return bookmark
35
+
36
+ try:
37
+ response = requests.get(url, timeout=5)
38
+ bookmark['etag'] = response.headers.get('ETag', 'N/A')
39
+ bookmark['status_code'] = response.status_code
40
+
41
+ if response.status_code >= 400:
42
+ bookmark['dead_link'] = True
43
+ bookmark['content'] = ''
44
+ else:
45
+ bookmark['dead_link'] = False
46
+ soup = BeautifulSoup(response.content, 'html.parser')
47
+ meta_tags = {meta.get('name', ''): meta.get('content', '') for meta in soup.find_all('meta')}
48
+ bookmark['meta_tags'] = meta_tags
49
+ bookmark['content'] = soup.get_text(separator=' ', strip=True)
50
+ except Exception as e:
51
+ bookmark['dead_link'] = True
52
+ bookmark['etag'] = 'N/A'
53
+ bookmark['status_code'] = 'N/A'
54
+ bookmark['meta_tags'] = {}
55
+ bookmark['content'] = ''
56
+ finally:
57
+ fetch_cache[url] = {
58
+ 'etag': bookmark.get('etag'),
59
+ 'status_code': bookmark.get('status_code'),
60
+ 'dead_link': bookmark.get('dead_link'),
61
+ 'meta_tags': bookmark.get('meta_tags'),
62
+ 'content': bookmark.get('content'),
63
+ }
64
+ return bookmark
65
 
66
  def generate_summary(bookmark):
67
+ content = bookmark.get('content', '')
68
+ if content:
69
+ # Limit content to first 2000 characters to save resources
70
+ content = content[:2000]
71
+ summary = summarizer(content, max_length=50, min_length=25, do_sample=False)
72
+ bookmark['summary'] = summary[0]['summary_text']
73
+ else:
74
+ bookmark['summary'] = 'No content available to summarize.'
75
+ return bookmark
76
 
77
  def vectorize_and_index(bookmarks):
78
+ summaries = [bookmark['summary'] for bookmark in bookmarks]
79
+ embeddings = embedding_model.encode(summaries)
80
+ dimension = embeddings.shape[1]
81
+ faiss_index = faiss.IndexFlatL2(dimension)
82
+ faiss_index.add(np.array(embeddings))
83
+ return faiss_index, embeddings
84
 
85
  def process_uploaded_file(file):
86
+ global bookmarks, index
87
+ if file is None:
88
+ return "Please upload a bookmarks HTML file."
89
+
90
+ file_content = file.read().decode('utf-8')
91
+ bookmarks = parse_bookmarks(file_content)
92
+
93
+ for bookmark in bookmarks:
94
+ fetch_url_info(bookmark)
95
+ generate_summary(bookmark)
96
+
97
+ index, embeddings = vectorize_and_index(bookmarks)
98
+ return f"Successfully processed {len(bookmarks)} bookmarks."
99
 
100
  def chatbot_response(user_query):
101
+ if index is None or not bookmarks:
102
+ return "No bookmarks available. Please upload and process your bookmarks first."
103
+
104
+ # Vectorize user query
105
+ user_embedding = embedding_model.encode([user_query])
106
+ D, I = index.search(np.array(user_embedding), k=5) # Retrieve top 5 matches
107
+
108
+ # Generate response
109
+ response = ""
110
+ for idx in I[0]:
111
+ bookmark = bookmarks[idx]
112
+ response += f"Title: {bookmark['title']}\nURL: {bookmark['url']}\nSummary: {bookmark['summary']}\n\n"
113
+ return response.strip()
114
 
115
  def display_bookmarks():
116
+ bookmark_list = []
117
+ for i, bookmark in enumerate(bookmarks):
118
+ status = "Dead Link" if bookmark.get('dead_link') else "Active"
119
+ bookmark_list.append([i, bookmark['title'], bookmark['url'], status])
120
+ return bookmark_list
121
 
122
  def edit_bookmark(index, new_title, new_url):
123
+ global index # Reference the global index variable
124
+ try:
125
+ index = int(index)
126
+ bookmarks[index]['title'] = new_title
127
+ bookmarks[index]['url'] = new_url
128
+ fetch_url_info(bookmarks[index])
129
+ generate_summary(bookmarks[index])
130
+ # Rebuild the FAISS index
131
+ index, embeddings = vectorize_and_index(bookmarks)
132
+ return "Bookmark updated successfully."
133
+ except Exception as e:
134
+ return f"Error: {str(e)}"
135
 
136
  def delete_bookmark(index):
137
+ global index # Reference the global index variable
138
+ try:
139
+ index = int(index)
140
+ bookmarks.pop(index)
141
+ # Rebuild the FAISS index
142
+ if bookmarks:
143
+ index, embeddings = vectorize_and_index(bookmarks)
144
+ else:
145
+ index = None # No bookmarks left
146
+ return "Bookmark deleted successfully."
147
+ except Exception as e:
148
+ return f"Error: {str(e)}"
149
 
150
  def build_app():
151
+ with gr.Blocks() as demo:
152
+ gr.Markdown("# Bookmark Manager App")
153
+
154
+ with gr.Tab("Upload and Process Bookmarks"):
155
+ upload = gr.File(label="Upload Bookmarks HTML File")
156
+ process_button = gr.Button("Process Bookmarks")
157
+ output_text = gr.Textbox(label="Output")
158
+
159
+ process_button.click(
160
+ process_uploaded_file,
161
+ inputs=upload,
162
+ outputs=output_text
163
+ )
164
+
165
+ with gr.Tab("Chat with Bookmarks"):
166
+ user_input = gr.Textbox(label="Ask about your bookmarks")
167
+ chat_output = gr.Textbox(label="Chatbot Response")
168
+ chat_button = gr.Button("Send")
169
+
170
+ chat_button.click(
171
+ chatbot_response,
172
+ inputs=user_input,
173
+ outputs=chat_output
174
+ )
175
+
176
+ with gr.Tab("Manage Bookmarks"):
177
+ bookmark_table = gr.Dataframe(
178
+ headers=["Index", "Title", "URL", "Status"],
179
+ datatype=["number", "str", "str", "str"],
180
+ interactive=False
181
+ )
182
+ refresh_button = gr.Button("Refresh Bookmark List")
183
+
184
+ with gr.Row():
185
+ index_input = gr.Number(label="Bookmark Index")
186
+ new_title_input = gr.Textbox(label="New Title")
187
+ new_url_input = gr.Textbox(label="New URL")
188
+
189
+ edit_button = gr.Button("Edit Bookmark")
190
+ delete_button = gr.Button("Delete Bookmark")
191
+ manage_output = gr.Textbox(label="Manage Output")
192
+
193
+ refresh_button.click(
194
+ display_bookmarks,
195
+ inputs=None,
196
+ outputs=bookmark_table
197
+ )
198
+
199
+ edit_button.click(
200
+ edit_bookmark,
201
+ inputs=[index_input, new_title_input, new_url_input],
202
+ outputs=manage_output
203
+ )
204
+
205
+ delete_button.click(
206
+ delete_bookmark,
207
+ inputs=index_input,
208
+ outputs=manage_output
209
+ )
210
+
211
+ demo.launch()
212
 
213
  if __name__ == "__main__":
214
  build_app()