siddhartharya commited on
Commit
3276277
·
verified ·
1 Parent(s): e3f2905

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -144
app.py CHANGED
@@ -15,79 +15,27 @@ faiss_index = None # Renamed from 'index' to 'faiss_index'
15
  bookmarks = []
16
  fetch_cache = {}
17
 
18
- # Helper functions
19
 
20
  def parse_bookmarks(file_content):
21
- soup = BeautifulSoup(file_content, 'html.parser')
22
- extracted_bookmarks = []
23
- for link in soup.find_all('a'):
24
- url = link.get('href')
25
- title = link.text
26
- if url and title:
27
- extracted_bookmarks.append({'url': url, 'title': title})
28
- return extracted_bookmarks
29
 
30
  def fetch_url_info(bookmark):
31
- url = bookmark['url']
32
- if url in fetch_cache:
33
- bookmark.update(fetch_cache[url])
34
- return bookmark
35
-
36
- try:
37
- response = requests.get(url, timeout=5)
38
- bookmark['etag'] = response.headers.get('ETag', 'N/A')
39
- bookmark['status_code'] = response.status_code
40
-
41
- if response.status_code >= 400:
42
- bookmark['dead_link'] = True
43
- bookmark['content'] = ''
44
- else:
45
- bookmark['dead_link'] = False
46
- soup = BeautifulSoup(response.content, 'html.parser')
47
- meta_tags = {meta.get('name', ''): meta.get('content', '') for meta in soup.find_all('meta')}
48
- bookmark['meta_tags'] = meta_tags
49
- bookmark['content'] = soup.get_text(separator=' ', strip=True)
50
- except Exception as e:
51
- bookmark['dead_link'] = True
52
- bookmark['etag'] = 'N/A'
53
- bookmark['status_code'] = 'N/A'
54
- bookmark['meta_tags'] = {}
55
- bookmark['content'] = ''
56
- finally:
57
- fetch_cache[url] = {
58
- 'etag': bookmark.get('etag'),
59
- 'status_code': bookmark.get('status_code'),
60
- 'dead_link': bookmark.get('dead_link'),
61
- 'meta_tags': bookmark.get('meta_tags'),
62
- 'content': bookmark.get('content'),
63
- }
64
- return bookmark
65
 
66
  def generate_summary(bookmark):
67
- content = bookmark.get('content', '')
68
- if content:
69
- # Limit content to first 2000 characters to save resources
70
- content = content[:2000]
71
- summary = summarizer(content, max_length=50, min_length=25, do_sample=False)
72
- bookmark['summary'] = summary[0]['summary_text']
73
- else:
74
- bookmark['summary'] = 'No content available to summarize.'
75
- return bookmark
76
 
77
  def vectorize_and_index(bookmarks):
78
- summaries = [bookmark['summary'] for bookmark in bookmarks]
79
- embeddings = embedding_model.encode(summaries)
80
- dimension = embeddings.shape[1]
81
- faiss_idx = faiss.IndexFlatL2(dimension)
82
- faiss_idx.add(np.array(embeddings))
83
- return faiss_idx, embeddings
84
 
85
  def process_uploaded_file(file):
86
  global bookmarks, faiss_index
87
  if file is None:
88
  return "Please upload a bookmarks HTML file."
89
 
90
- file_content = file.read().decode('utf-8')
 
91
  bookmarks = parse_bookmarks(file_content)
92
 
93
  for bookmark in bookmarks:
@@ -98,61 +46,23 @@ def process_uploaded_file(file):
98
  return f"Successfully processed {len(bookmarks)} bookmarks."
99
 
100
  def chatbot_response(user_query):
101
- if faiss_index is None or not bookmarks:
102
- return "No bookmarks available. Please upload and process your bookmarks first."
103
-
104
- # Vectorize user query
105
- user_embedding = embedding_model.encode([user_query])
106
- D, I = faiss_index.search(np.array(user_embedding), k=5) # Retrieve top 5 matches
107
-
108
- # Generate response
109
- response = ""
110
- for idx in I[0]:
111
- bookmark = bookmarks[idx]
112
- response += f"Title: {bookmark['title']}\nURL: {bookmark['url']}\nSummary: {bookmark['summary']}\n\n"
113
- return response.strip()
114
 
115
  def display_bookmarks():
116
- bookmark_list = []
117
- for i, bookmark in enumerate(bookmarks):
118
- status = "Dead Link" if bookmark.get('dead_link') else "Active"
119
- bookmark_list.append([i, bookmark['title'], bookmark['url'], status])
120
- return bookmark_list
121
 
122
  def edit_bookmark(bookmark_idx, new_title, new_url):
123
- global faiss_index # Reference the global faiss_index variable
124
- try:
125
- bookmark_idx = int(bookmark_idx)
126
- bookmarks[bookmark_idx]['title'] = new_title
127
- bookmarks[bookmark_idx]['url'] = new_url
128
- fetch_url_info(bookmarks[bookmark_idx])
129
- generate_summary(bookmarks[bookmark_idx])
130
- # Rebuild the FAISS index
131
- faiss_index, embeddings = vectorize_and_index(bookmarks)
132
- return "Bookmark updated successfully."
133
- except Exception as e:
134
- return f"Error: {str(e)}"
135
 
136
  def delete_bookmark(bookmark_idx):
137
- global faiss_index # Reference the global faiss_index variable
138
- try:
139
- bookmark_idx = int(bookmark_idx)
140
- bookmarks.pop(bookmark_idx)
141
- # Rebuild the FAISS index
142
- if bookmarks:
143
- faiss_index, embeddings = vectorize_and_index(bookmarks)
144
- else:
145
- faiss_index = None # No bookmarks left
146
- return "Bookmark deleted successfully."
147
- except Exception as e:
148
- return f"Error: {str(e)}"
149
 
150
  def build_app():
151
  with gr.Blocks() as demo:
152
  gr.Markdown("# Bookmark Manager App")
153
 
154
  with gr.Tab("Upload and Process Bookmarks"):
155
- upload = gr.File(label="Upload Bookmarks HTML File")
156
  process_button = gr.Button("Process Bookmarks")
157
  output_text = gr.Textbox(label="Output")
158
 
@@ -163,50 +73,10 @@ def build_app():
163
  )
164
 
165
  with gr.Tab("Chat with Bookmarks"):
166
- user_input = gr.Textbox(label="Ask about your bookmarks")
167
- chat_output = gr.Textbox(label="Chatbot Response")
168
- chat_button = gr.Button("Send")
169
-
170
- chat_button.click(
171
- chatbot_response,
172
- inputs=user_input,
173
- outputs=chat_output
174
- )
175
 
176
  with gr.Tab("Manage Bookmarks"):
177
- bookmark_table = gr.Dataframe(
178
- headers=["Index", "Title", "URL", "Status"],
179
- datatype=["number", "str", "str", "str"],
180
- interactive=False
181
- )
182
- refresh_button = gr.Button("Refresh Bookmark List")
183
-
184
- with gr.Row():
185
- index_input = gr.Number(label="Bookmark Index")
186
- new_title_input = gr.Textbox(label="New Title")
187
- new_url_input = gr.Textbox(label="New URL")
188
-
189
- edit_button = gr.Button("Edit Bookmark")
190
- delete_button = gr.Button("Delete Bookmark")
191
- manage_output = gr.Textbox(label="Manage Output")
192
-
193
- refresh_button.click(
194
- display_bookmarks,
195
- inputs=None,
196
- outputs=bookmark_table
197
- )
198
-
199
- edit_button.click(
200
- edit_bookmark,
201
- inputs=[index_input, new_title_input, new_url_input],
202
- outputs=manage_output
203
- )
204
-
205
- delete_button.click(
206
- delete_bookmark,
207
- inputs=index_input,
208
- outputs=manage_output
209
- )
210
 
211
  demo.launch()
212
 
 
15
  bookmarks = []
16
  fetch_cache = {}
17
 
18
+ # Helper functions remain the same...
19
 
20
  def parse_bookmarks(file_content):
21
+ # [Same as before]
 
 
 
 
 
 
 
22
 
23
  def fetch_url_info(bookmark):
24
+ # [Same as before]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  def generate_summary(bookmark):
27
+ # [Same as before]
 
 
 
 
 
 
 
 
28
 
29
  def vectorize_and_index(bookmarks):
30
+ # [Same as before]
 
 
 
 
 
31
 
32
  def process_uploaded_file(file):
33
  global bookmarks, faiss_index
34
  if file is None:
35
  return "Please upload a bookmarks HTML file."
36
 
37
+ # Since 'file' is now bytes, decode it directly
38
+ file_content = file.decode('utf-8')
39
  bookmarks = parse_bookmarks(file_content)
40
 
41
  for bookmark in bookmarks:
 
46
  return f"Successfully processed {len(bookmarks)} bookmarks."
47
 
48
  def chatbot_response(user_query):
49
+ # [Same as before]
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
  def display_bookmarks():
52
+ # [Same as before]
 
 
 
 
53
 
54
  def edit_bookmark(bookmark_idx, new_title, new_url):
55
+ # [Same as before]
 
 
 
 
 
 
 
 
 
 
 
56
 
57
  def delete_bookmark(bookmark_idx):
58
+ # [Same as before]
 
 
 
 
 
 
 
 
 
 
 
59
 
60
  def build_app():
61
  with gr.Blocks() as demo:
62
  gr.Markdown("# Bookmark Manager App")
63
 
64
  with gr.Tab("Upload and Process Bookmarks"):
65
+ upload = gr.File(label="Upload Bookmarks HTML File", type='bytes') # Updated here
66
  process_button = gr.Button("Process Bookmarks")
67
  output_text = gr.Textbox(label="Output")
68
 
 
73
  )
74
 
75
  with gr.Tab("Chat with Bookmarks"):
76
+ # [Same as before]
 
 
 
 
 
 
 
 
77
 
78
  with gr.Tab("Manage Bookmarks"):
79
+ # [Same as before]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
 
81
  demo.launch()
82