Spaces:

siddhartharya
/

Bookmark-Manager

Running

App Files Files Community

siddhartharya commited on Nov 26, 2024

Commit

eb485e7

verified ·

1 Parent(s): 31c955d

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -75

app.py CHANGED Viewed

@@ -265,21 +265,23 @@ def llm_worker():
                 tpm_bucket.wait_for_token(tokens=total_tokens)
                 # Prepare prompt
-                prompt = "You are an assistant that creates concise webpage summaries and assigns categories.\n\n"
-                prompt += "Provide summaries and categories for the following bookmarks:\n\n"
                 for idx, bookmark in enumerate(batch, 1):
-                    prompt += f"Bookmark {idx}:\nURL: {bookmark['url']}\nTitle: {bookmark['title']}\n\n"
                 # Corrected f-string without backslashes
-                prompt += f"Categories:\n{', '.join([f'\"{cat}\"' for cat in CATEGORIES])}\n\n"
                 prompt += "Format your response as a JSON object where each key is the bookmark URL and the value is another JSON object containing 'summary' and 'category'.\n\n"
                 prompt += "Example:\n"
                 prompt += "{\n"
-                prompt += "  \"https://example.com\": {\n"
-                prompt += "    \"summary\": \"This is an example summary.\",\n"
-                prompt += "    \"category\": \"Technology\"\n"
                 prompt += "  }\n"
                 prompt += "}\n\n"
                 prompt += "Now, provide the summaries and categories for the bookmarks listed above."
@@ -338,7 +340,7 @@ def llm_worker():
                         bookmark['summary'] = 'No summary available.'
                         bookmark['category'] = 'Uncategorized'
-            except openai.error.RateLimitError as e:
                 logger.warning(f"LLM Rate limit reached. Retrying after 60 seconds.")
                 # Re-enqueue the entire batch for retry
                 for bookmark in batch:
@@ -357,41 +359,27 @@ def llm_worker():
                 for _ in batch:
                     llm_queue.task_done()
-def categorize_based_on_summary(summary, url):
-    """
-    Assign category based on keywords in the summary or URL.
-    """
-    summary_lower = summary.lower()
-    url_lower = url.lower()
-    if 'social media' in summary_lower or 'twitter' in summary_lower or 'x.com' in url_lower:
-        return 'Social Media'
-    elif 'wikipedia' in url_lower:
-        return 'Reference and Knowledge Bases'
-    elif 'cloud computing' in summary_lower or 'aws' in summary_lower:
-        return 'Technology'
-    elif 'news' in summary_lower or 'media' in summary_lower:
-        return 'News and Media'
-    elif 'education' in summary_lower or 'learning' in summary_lower:
-        return 'Education and Learning'
-    # Add more conditions as needed
-    else:
-        return 'Uncategorized'
-def validate_category(bookmark):
     """
-    Further validate and adjust the category if needed.
     """
-    # Example: Specific cases based on URL
-    url_lower = bookmark['url'].lower()
-    if 'facebook' in url_lower or 'x.com' in url_lower:
-        return 'Social Media'
-    elif 'wikipedia' in url_lower:
-        return 'Reference and Knowledge Bases'
-    elif 'aws.amazon.com' in url_lower:
-        return 'Technology'
-    # Add more specific cases as needed
-    else:
-        return bookmark['category']
 def fetch_url_info(bookmark):
     """
@@ -453,28 +441,6 @@ def fetch_url_info(bookmark):
                 'slow_link': bookmark.get('slow_link', False),
             }
-def parse_bookmarks(file_content):
-    """
-    Parse bookmarks from HTML file.
-    """
-    logger.info("Parsing bookmarks")
-    try:
-        soup = BeautifulSoup(file_content, 'html.parser')
-        extracted_bookmarks = []
-        for link in soup.find_all('a'):
-            url = link.get('href')
-            title = link.text.strip()
-            if url and title:
-                if url.startswith('http://') or url.startswith('https://'):
-                    extracted_bookmarks.append({'url': url, 'title': title})
-                else:
-                    logger.info(f"Skipping non-http/https URL: {url}")
-        logger.info(f"Extracted {len(extracted_bookmarks)} bookmarks")
-        return extracted_bookmarks
-    except Exception as e:
-        logger.error("Error parsing bookmarks: %s", e, exc_info=True)
-        raise
 def vectorize_and_index(bookmarks_list):
     """
     Create vector embeddings for bookmarks and build FAISS index with ID mapping.
@@ -546,14 +512,6 @@ def display_bookmarks():
     logger.info("HTML display generated")
     return cards
-def generate_summary_and_assign_category(bookmark):
-    """
-    Generate a concise summary and assign a category using a single LLM call.
-    This function is now handled by the LLM worker thread.
-    """
-    # This function is now deprecated and handled by the worker thread.
-    pass
 def process_uploaded_file(file, state_bookmarks):
     """
     Process the uploaded bookmarks file.
@@ -749,13 +707,13 @@ def chatbot_response(user_query, chat_history):
             for bookmark in matching_bookmarks
         ])
-        prompt = f"""
 A user asked: "{user_query}"
 Based on the bookmarks below, provide a helpful answer to the user's query, referencing the relevant bookmarks.
 Bookmarks:
 {bookmarks_info}
 Provide a concise and helpful response.
-"""
         response = openai.ChatCompletion.create(
             model='llama-3.1-70b-versatile',  # Ensure this model is correct and available
@@ -772,8 +730,8 @@ Provide a concise and helpful response.
         chat_history.append({"role": "assistant", "content": answer})
         return chat_history
-    except openai.error.RateLimitError as e:
-        wait_time = int(e.headers.get("Retry-After", 5))
         logger.warning(f"Rate limit reached. Waiting for {wait_time} seconds before retrying...")
         time.sleep(wait_time)
         return chatbot_response(user_query, chat_history)

                 tpm_bucket.wait_for_token(tokens=total_tokens)
                 # Prepare prompt
+                prompt = f'''
+You are an assistant that creates concise webpage summaries and assigns categories.
+Provide summaries and categories for the following bookmarks:
+'''
                 for idx, bookmark in enumerate(batch, 1):
+                    prompt += f'Bookmark {idx}:\nURL: {bookmark["url"]}\nTitle: {bookmark["title"]}\n\n'
                 # Corrected f-string without backslashes
+                prompt += f'Categories:\n{", ".join([f\'"{cat}"\' for cat in CATEGORIES])}\n\n'
                 prompt += "Format your response as a JSON object where each key is the bookmark URL and the value is another JSON object containing 'summary' and 'category'.\n\n"
                 prompt += "Example:\n"
                 prompt += "{\n"
+                prompt += '  "https://example.com": {\n'
+                prompt += '    "summary": "This is an example summary.",\n'
+                prompt += '    "category": "Technology"\n'
                 prompt += "  }\n"
                 prompt += "}\n\n"
                 prompt += "Now, provide the summaries and categories for the bookmarks listed above."
                         bookmark['summary'] = 'No summary available.'
                         bookmark['category'] = 'Uncategorized'
+            except openai.error.RateLimitError:
                 logger.warning(f"LLM Rate limit reached. Retrying after 60 seconds.")
                 # Re-enqueue the entire batch for retry
                 for bookmark in batch:
                 for _ in batch:
                     llm_queue.task_done()
+def parse_bookmarks(file_content):
     """
+    Parse bookmarks from HTML file.
     """
+    logger.info("Parsing bookmarks")
+    try:
+        soup = BeautifulSoup(file_content, 'html.parser')
+        extracted_bookmarks = []
+        for link in soup.find_all('a'):
+            url = link.get('href')
+            title = link.text.strip()
+            if url and title:
+                if url.startswith('http://') or url.startswith('https://'):
+                    extracted_bookmarks.append({'url': url, 'title': title})
+                else:
+                    logger.info(f"Skipping non-http/https URL: {url}")
+        logger.info(f"Extracted {len(extracted_bookmarks)} bookmarks")
+        return extracted_bookmarks
+    except Exception as e:
+        logger.error("Error parsing bookmarks: %s", e, exc_info=True)
+        raise
 def fetch_url_info(bookmark):
     """
                 'slow_link': bookmark.get('slow_link', False),
             }
 def vectorize_and_index(bookmarks_list):
     """
     Create vector embeddings for bookmarks and build FAISS index with ID mapping.
     logger.info("HTML display generated")
     return cards
 def process_uploaded_file(file, state_bookmarks):
     """
     Process the uploaded bookmarks file.
             for bookmark in matching_bookmarks
         ])
+        prompt = f'''
 A user asked: "{user_query}"
 Based on the bookmarks below, provide a helpful answer to the user's query, referencing the relevant bookmarks.
 Bookmarks:
 {bookmarks_info}
 Provide a concise and helpful response.
+'''
         response = openai.ChatCompletion.create(
             model='llama-3.1-70b-versatile',  # Ensure this model is correct and available
         chat_history.append({"role": "assistant", "content": answer})
         return chat_history
+    except openai.error.RateLimitError:
+        wait_time = int(60)  # Wait time can be adjusted or extracted from headers if available
         logger.warning(f"Rate limit reached. Waiting for {wait_time} seconds before retrying...")
         time.sleep(wait_time)
         return chatbot_response(user_query, chat_history)