Spaces:

siddhartharya
/

Bookmark-Manager

Running

App Files Files Community

siddhartharya commited on Nov 26, 2024

Commit

70eb2ff

verified ·

1 Parent(s): ab5c457

Update app.py

Browse files

Files changed (1) hide show

app.py +205 -204

app.py CHANGED Viewed

@@ -8,14 +8,18 @@ import numpy as np
 import requests
 import time
 import re
 import logging
 import os
 import sys
 from concurrent.futures import ThreadPoolExecutor
 import threading
-from html import escape
-# Suppress specific warnings
 import urllib3
 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
@@ -43,10 +47,6 @@ fetch_cache = {}
 # Lock for thread-safe operations
 lock = threading.Lock()
-api_lock = threading.Lock()  # Added api_lock
-# Initialize last_api_call_time
-last_api_call_time = 0  # Added initialization
 # Define the categories
 CATEGORIES = [
@@ -74,41 +74,18 @@ CATEGORIES = [
     "Uncategorized",
 ]
-# Define a function to generate responses using llama-3.1-70b-versatile
-def generate_llama_response(prompt):
-    """
-    Generate a response using the llama-3.1-70b-versatile model.
-    This implementation assumes that the model is accessible via a local HTTP API endpoint.
-    Replace the URL and request parameters as per your actual setup.
-    """
-    try:
-        logger.info("Generating response using llama-3.1-70b-versatile")
-        api_url = "http://localhost:5000/generate"  # Replace with your actual endpoint
-        headers = {
-            'Content-Type': 'application/json',
-        }
-        payload = {
-            'prompt': prompt,
-            'max_tokens': 500,  # Adjust as needed
-            'temperature': 0.7,  # Adjust as needed
-        }
-        response = requests.post(api_url, json=payload, headers=headers, timeout=30)
-        response.raise_for_status()  # Raise an exception for HTTP errors
-        data = response.json()
-        generated_text = data.get('response', '').strip()
-        if not generated_text:
-            raise ValueError("Empty response received from the model.")
-        return generated_text
-    except requests.exceptions.RequestException as e:
-        logger.error(f"HTTP Request failed: {e}", exc_info=True)
-        return "Error generating response due to HTTP request failure."
-    except ValueError as ve:
-        logger.error(f"Value Error: {ve}", exc_info=True)
-        return "Error generating response: Received empty response from the model."
-    except Exception as e:
-        logger.error(f"Unexpected error: {e}", exc_info=True)
-        return "An unexpected error occurred while generating the response."
 def extract_main_content(soup):
     """
@@ -130,7 +107,7 @@ def extract_main_content(soup):
         content = soup.get_text(separator=' ', strip=True)
     # Clean up the text
-    content = re.sub(r'\s+', ' ', content)  # Remove multiple spaces
     # Truncate content to a reasonable length (e.g., 1500 words)
     words = content.split()
@@ -181,55 +158,57 @@ def get_page_metadata(soup):
 def generate_summary_and_assign_category(bookmark):
     """
-    Generate a concise summary and assign a category using the llama-3.1-70b-versatile model.
     """
     logger.info(f"Generating summary and assigning category for bookmark: {bookmark.get('url')}")
-    try:
-        # Rate Limiting Logic
-        with api_lock:
-            global last_api_call_time
-            current_time = time.time()
-            elapsed = current_time - last_api_call_time
-            if elapsed < 2:
-                sleep_duration = 2 - elapsed
-                logger.info(f"Sleeping for {sleep_duration:.2f} seconds to respect rate limits.")
-                time.sleep(sleep_duration)
-            last_api_call_time = current_time
-        # Prepare the prompt
-        html_content = bookmark.get('html_content', '')
-        soup = BeautifulSoup(html_content, 'html.parser')
-        metadata = get_page_metadata(soup)
-        main_content = extract_main_content(soup)
-        # Prepare content for the prompt
-        content_parts = []
-        if metadata['title']:
-            content_parts.append(f"Title: {metadata['title']}")
-        if metadata['description']:
-            content_parts.append(f"Description: {metadata['description']}")
-        if metadata['keywords']:
-            content_parts.append(f"Keywords: {metadata['keywords']}")
-        if main_content:
-            content_parts.append(f"Main Content: {main_content}")
-        content_text = '\n'.join(content_parts)
-        # Detect insufficient or erroneous content
-        error_keywords = ['Access Denied', 'Security Check', 'Cloudflare', 'captcha', 'unusual traffic']
-        if not content_text or len(content_text.split()) < 50:
-            use_prior_knowledge = True
-            logger.info(f"Content for {bookmark.get('url')} is insufficient. Instructing LLM to use prior knowledge.")
-        elif any(keyword.lower() in content_text.lower() for keyword in error_keywords):
-            use_prior_knowledge = True
-            logger.info(f"Content for {bookmark.get('url')} contains error messages. Instructing LLM to use prior knowledge.")
-        else:
-            use_prior_knowledge = False
-        # Craft the prompt based on content availability
-        if use_prior_knowledge:
-            prompt = f"""
 You are a knowledgeable assistant with up-to-date information as of 2023.
 URL: {bookmark.get('url')}
 Provide:
@@ -241,8 +220,8 @@ Format:
 Summary: [Your summary]
 Category: [One category]
 """
-        else:
-            prompt = f"""
 You are an assistant that creates concise webpage summaries and assigns categories.
 Content:
 {content_text}
@@ -256,44 +235,70 @@ Summary: [Your summary]
 Category: [One category]
 """
-        # Generate response using llama-3.1-70b-versatile
-        response = generate_llama_response(prompt)
-        if not response:
-            raise ValueError("Empty response received from the model.")
-        # Parse the response
-        summary_match = re.search(r"Summary:\s*(.*)", response)
-        category_match = re.search(r"Category:\s*(.*)", response)
-        if summary_match:
-            bookmark['summary'] = summary_match.group(1).strip()
-        else:
-            bookmark['summary'] = 'No summary available.'
-        if category_match:
-            category = category_match.group(1).strip().strip('"')
-            if category in CATEGORIES:
-                bookmark['category'] = category
             else:
                 bookmark['category'] = 'Uncategorized'
-        else:
-            bookmark['category'] = 'Uncategorized'
-        # Optional: Simple keyword-based validation
-        summary_lower = bookmark['summary'].lower()
-        url_lower = bookmark['url'].lower()
-        if 'social media' in summary_lower or 'twitter' in summary_lower or 'x.com' in url_lower:
-            bookmark['category'] = 'Social Media'
-        elif 'wikipedia' in url_lower:
-            bookmark['category'] = 'Reference and Knowledge Bases'
-        logger.info("Successfully generated summary and assigned category")
-    except Exception as e:
-        logger.error(f"Error generating summary and assigning category: {e}", exc_info=True)
-        bookmark['summary'] = 'No summary available.'
-        bookmark['category'] = 'Uncategorized'
 def parse_bookmarks(file_content):
     """
@@ -340,9 +345,7 @@ def fetch_url_info(bookmark):
         content = response.text
         logger.info(f"Fetched content length for {url}: {len(content)} characters")
-        # Handle status codes
         if response.status_code >= 500:
-            # Server error, consider as dead link
             bookmark['dead_link'] = True
             bookmark['description'] = ''
             bookmark['html_content'] = ''
@@ -354,12 +357,12 @@ def fetch_url_info(bookmark):
             logger.info(f"Fetched information for {url}")
     except requests.exceptions.Timeout:
-        bookmark['dead_link'] = False  # Mark as 'Unknown' instead of 'Dead'
         bookmark['etag'] = 'N/A'
         bookmark['status_code'] = 'Timeout'
         bookmark['description'] = ''
         bookmark['html_content'] = ''
-        bookmark['slow_link'] = True  # Custom flag to indicate slow response
         logger.warning(f"Timeout while fetching {url}. Marking as 'Slow'.")
     except Exception as e:
         bookmark['dead_link'] = True
@@ -390,7 +393,6 @@ def vectorize_and_index(bookmarks_list):
         embeddings = embedding_model.encode(summaries)
         dimension = embeddings.shape[1]
         index = faiss.IndexIDMap(faiss.IndexFlatL2(dimension))
-        # Assign unique IDs to each bookmark
         ids = np.array([bookmark['id'] for bookmark in bookmarks_list], dtype=np.int64)
         index.add_with_ids(np.array(embeddings).astype('float32'), ids)
         faiss_index = index
@@ -411,15 +413,15 @@ def display_bookmarks():
         if bookmark.get('dead_link'):
             status = "❌ Dead Link"
             card_style = "border: 2px solid red;"
-            text_style = "color: white;"  # Set font color to white
         elif bookmark.get('slow_link'):
-            status = "⏳ Slow Response"
             card_style = "border: 2px solid orange;"
-            text_style = "color: white;"  # Set font color to white
         else:
             status = "✅ Active"
             card_style = "border: 2px solid green;"
-            text_style = "color: white;"  # Set font color to white
         title = bookmark['title']
         url = bookmark['url']
@@ -428,6 +430,7 @@ def display_bookmarks():
         category = bookmark.get('category', 'Uncategorized')
         # Escape HTML content to prevent XSS attacks
         title = escape(title)
         url = escape(url)
         summary = escape(summary)
@@ -457,23 +460,23 @@ def process_uploaded_file(file, state_bookmarks):
     if file is None:
         logger.warning("No file uploaded")
-        return "Please upload a bookmarks HTML file.", '', state_bookmarks, gr.update(choices=[])
     try:
         file_content = file.decode('utf-8')
     except UnicodeDecodeError as e:
         logger.error(f"Error decoding the file: {e}", exc_info=True)
-        return "Error decoding the file. Please ensure it's a valid HTML file.", '', state_bookmarks, gr.update(choices=[])
     try:
         bookmarks = parse_bookmarks(file_content)
     except Exception as e:
         logger.error(f"Error parsing bookmarks: {e}", exc_info=True)
-        return "Error parsing the bookmarks HTML file.", '', state_bookmarks, gr.update(choices=[])
     if not bookmarks:
         logger.warning("No bookmarks found in the uploaded file")
-        return "No bookmarks found in the uploaded file.", '', state_bookmarks, gr.update(choices=[])
     # Assign unique IDs to bookmarks
     for idx, bookmark in enumerate(bookmarks):
@@ -481,19 +484,19 @@ def process_uploaded_file(file, state_bookmarks):
     # Fetch bookmark info concurrently
     logger.info("Fetching URL info concurrently")
-    with ThreadPoolExecutor(max_workers=10) as executor:  # Adjust max_workers as needed
         executor.map(fetch_url_info, bookmarks)
     # Process bookmarks concurrently with LLM calls
     logger.info("Processing bookmarks with LLM concurrently")
-    with ThreadPoolExecutor(max_workers=1) as executor:  # Serialize API calls to respect rate limits
         executor.map(generate_summary_and_assign_category, bookmarks)
     try:
         faiss_index = vectorize_and_index(bookmarks)
     except Exception as e:
         logger.error(f"Error building FAISS index: {e}", exc_info=True)
-        return "Error building search index.", '', state_bookmarks, gr.update(choices=[])
     message = f"✅ Successfully processed {len(bookmarks)} bookmarks."
     logger.info(message)
@@ -506,7 +509,7 @@ def process_uploaded_file(file, state_bookmarks):
     # Update state
     state_bookmarks = bookmarks.copy()
-    return message, bookmark_html, state_bookmarks, gr.update(choices=choices)
 def delete_selected_bookmarks(selected_indices, state_bookmarks):
     """
@@ -519,15 +522,12 @@ def delete_selected_bookmarks(selected_indices, state_bookmarks):
     ids_to_delete = []
     indices_to_delete = []
     for s in selected_indices:
-        try:
-            idx = int(s.split('.')[0]) - 1
-            if 0 <= idx < len(bookmarks):
-                bookmark_id = bookmarks[idx]['id']
-                ids_to_delete.append(bookmark_id)
-                indices_to_delete.append(idx)
-                logger.info(f"Deleting bookmark at index {idx + 1}")
-        except (ValueError, IndexError):
-            logger.warning(f"Invalid selection format: {s}")
     # Remove vectors from FAISS index
     if faiss_index is not None and ids_to_delete:
@@ -556,20 +556,11 @@ def edit_selected_bookmarks_category(selected_indices, new_category, state_bookm
     if not new_category:
         return "⚠️ No new category selected.", gr.update(choices=[]), display_bookmarks(), state_bookmarks
-    indices = []
-    for s in selected_indices:
-        try:
-            idx = int(s.split('.')[0])-1
-            if 0 <= idx < len(bookmarks):
-                indices.append(idx)
-            else:
-                logger.warning(f"Index out of range: {idx + 1}")
-        except ValueError:
-            logger.warning(f"Invalid selection format: {s}")
     for idx in indices:
-        bookmarks[idx]['category'] = new_category
-        logger.info(f"Updated category for bookmark {idx + 1} to {new_category}")
     message = "✏️ Category updated for selected bookmarks."
     logger.info(message)
@@ -589,7 +580,7 @@ def export_bookmarks():
     """
     if not bookmarks:
         logger.warning("No bookmarks to export")
-        return None  # Return None to indicate no file
     try:
         logger.info("Exporting bookmarks to HTML")
@@ -603,19 +594,18 @@ def export_bookmarks():
             dl.append(dt)
         soup.append(dl)
         html_content = str(soup)
-        # Save to a temporary file
         output_file = "exported_bookmarks.html"
         with open(output_file, 'w', encoding='utf-8') as f:
             f.write(html_content)
         logger.info("Bookmarks exported successfully")
-        return output_file  # Return the file path
     except Exception as e:
         logger.error(f"Error exporting bookmarks: {e}", exc_info=True)
-        return None  # Return None in case of error
 def chatbot_response(user_query, chat_history):
     """
-    Generate chatbot response using the FAISS index and embeddings, maintaining chat history.
     """
     if not bookmarks or faiss_index is None:
         logger.warning("No bookmarks available for chatbot")
@@ -625,10 +615,8 @@ def chatbot_response(user_query, chat_history):
     logger.info(f"Chatbot received query: {user_query}")
     try:
-        # Append user's message to chat history
         chat_history.append({"role": "user", "content": user_query})
-        # Rate Limiting Logic (if necessary)
         with api_lock:
             global last_api_call_time
             current_time = time.time()
@@ -637,15 +625,13 @@ def chatbot_response(user_query, chat_history):
                 sleep_duration = 2 - elapsed
                 logger.info(f"Sleeping for {sleep_duration:.2f} seconds to respect rate limits.")
                 time.sleep(sleep_duration)
-            last_api_call_time = current_time
-        # Encode the query and search the FAISS index
         query_vector = embedding_model.encode([user_query]).astype('float32')
-        k = 5  # Number of results to return
         distances, ids = faiss_index.search(query_vector, k)
         ids = ids.flatten()
-        # Retrieve the bookmarks
         id_to_bookmark = {bookmark['id']: bookmark for bookmark in bookmarks}
         matching_bookmarks = [id_to_bookmark.get(id) for id in ids if id in id_to_bookmark]
@@ -654,13 +640,11 @@ def chatbot_response(user_query, chat_history):
             chat_history.append({"role": "assistant", "content": answer})
             return chat_history
-        # Format the response
         bookmarks_info = "\n".join([
             f"Title: {bookmark['title']}\nURL: {bookmark['url']}\nSummary: {bookmark['summary']}"
             for bookmark in matching_bookmarks
         ])
-        # Craft the prompt for the LLM
         prompt = f"""
 A user asked: "{user_query}"
 Based on the bookmarks below, provide a helpful answer to the user's query, referencing the relevant bookmarks.
@@ -669,19 +653,39 @@ Bookmarks:
 Provide a concise and helpful response.
 """
-        # Generate response using llama-3.1-70b-versatile
-        response = generate_llama_response(prompt)
-        if not response:
-            raise ValueError("Empty response received from the model.")
-        answer = response.strip()
         logger.info("Chatbot response generated")
-        # Append the assistant's response to chat history
         chat_history.append({"role": "assistant", "content": answer})
         return chat_history
     except Exception as e:
         error_message = f"⚠️ Error processing your query: {str(e)}"
         logger.error(error_message, exc_info=True)
@@ -698,12 +702,6 @@ def build_app():
             # Initialize state
             state_bookmarks = gr.State([])
-            # Define 'bookmark_selector' globally
-            bookmark_selector = gr.CheckboxGroup(
-                label="✅ Select Bookmarks",
-                choices=[]
-            )
             # General Overview
             gr.Markdown("""
 # 📚 SmartMarks - AI Browser Bookmarks Manager
@@ -723,7 +721,7 @@ SmartMarks is divided into three main sections:
 Navigate through the tabs to explore each feature in detail.
 """)
-            # Define tabs
             with gr.Tab("Upload and Process Bookmarks"):
                 gr.Markdown("""
 ## 📂 **Upload and Process Bookmarks**
@@ -741,17 +739,13 @@ Navigate through the tabs to explore each feature in detail.
 3. **View Processed Bookmarks:**
    - Once processing is complete, your bookmarks will be displayed in an organized and visually appealing format below.
 """)
                 upload = gr.File(label="📁 Upload Bookmarks HTML File", type='binary')
                 process_button = gr.Button("⚙️ Process Bookmarks")
                 output_text = gr.Textbox(label="✅ Output", interactive=False)
                 bookmark_display = gr.HTML(label="📄 Processed Bookmarks")
-                process_button.click(
-                    process_uploaded_file,
-                    inputs=[upload, state_bookmarks],
-                    outputs=[output_text, bookmark_display, state_bookmarks, bookmark_selector]
-                )
             with gr.Tab("Chat with Bookmarks"):
                 gr.Markdown("""
 ## 💬 **Chat with Bookmarks**
@@ -770,6 +764,7 @@ Navigate through the tabs to explore each feature in detail.
 4. **View Chat History:**
    - All your queries and the corresponding AI responses are displayed in the chat history for your reference.
 """)
                 chatbot = gr.Chatbot(label="💬 Chat with SmartMarks", type='messages')
                 user_input = gr.Textbox(
                     label="✍️ Ask about your bookmarks",
@@ -783,10 +778,10 @@ Navigate through the tabs to explore each feature in detail.
                     outputs=chatbot
                 )
             with gr.Tab("Manage Bookmarks"):
                 gr.Markdown("""
-## 🛠️ **Manage Bookmarks**
 ### 🗂️ **Features:**
 1. **View Bookmarks:**
@@ -810,7 +805,15 @@ Navigate through the tabs to explore each feature in detail.
 6. **Refresh Bookmarks:**
    - Click the **"🔄 Refresh Bookmarks"** button to ensure the latest state is reflected in the display.
 """)
                 manage_output = gr.Textbox(label="🔄 Status", interactive=False)
                 new_category = gr.Dropdown(
                     label="🆕 New Category",
                     choices=CATEGORIES,
@@ -818,11 +821,6 @@ Navigate through the tabs to explore each feature in detail.
                 )
                 bookmark_display_manage = gr.HTML(label="📄 Bookmarks")
-                with gr.Row():
-                    # Include 'bookmark_selector' within the tab
-                    # It is defined globally and will be displayed only in this tab via CSS
-                    bookmark_selector
                 with gr.Row():
                     delete_button = gr.Button("🗑️ Delete Selected")
                     edit_category_button = gr.Button("✏️ Edit Category")
@@ -831,7 +829,13 @@ Navigate through the tabs to explore each feature in detail.
                 download_link = gr.File(label="📥 Download Exported Bookmarks")
-                # Define button actions
                 delete_button.click(
                     delete_selected_bookmarks,
                     inputs=[bookmark_selector, state_bookmarks],
@@ -852,7 +856,8 @@ Navigate through the tabs to explore each feature in detail.
                 refresh_button.click(
                     lambda state_bookmarks: (
                         [
-                            f"{i+1}. {bookmark['title']} (Category: {bookmark['category']})" for i, bookmark in enumerate(state_bookmarks)
                         ],
                         display_bookmarks()
                     ),
@@ -862,12 +867,8 @@ Navigate through the tabs to explore each feature in detail.
         logger.info("Launching Gradio app")
         demo.launch(debug=True)
-    except gr.Error as e:
-        logger.error(f"Gradio Error: {e}", exc_info=True)
-        print(f"Gradio Error: {e}")
     except Exception as e:
         logger.error(f"Error building the app: {e}", exc_info=True)
         print(f"Error building the app: {e}")
-if __name__ == "__main__":
-    build_app()

 import requests
 import time
 import re
+import base64
 import logging
 import os
 import sys
+import concurrent.futures
 from concurrent.futures import ThreadPoolExecutor
 import threading
+# Import OpenAI library
+import openai
+# Suppress only the single warning from urllib3 needed.
 import urllib3
 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
 # Lock for thread-safe operations
 lock = threading.Lock()
 # Define the categories
 CATEGORIES = [
     "Uncategorized",
 ]
+# Set up Groq Cloud API key and base URL
+GROQ_API_KEY = os.getenv('GROQ_API_KEY')
+if not GROQ_API_KEY:
+    logger.error("GROQ_API_KEY environment variable not set.")
+openai.api_key = GROQ_API_KEY
+openai.api_base = "https://api.groq.com/openai/v1"
+# Initialize global variables for rate limiting
+api_lock = threading.Lock()
+last_api_call_time = 0
 def extract_main_content(soup):
     """
         content = soup.get_text(separator=' ', strip=True)
     # Clean up the text
+    content = re.sub(r'\s+', ' ', content)
     # Truncate content to a reasonable length (e.g., 1500 words)
     words = content.split()
 def generate_summary_and_assign_category(bookmark):
     """
+    Generate a concise summary and assign a category using a single LLM call.
     """
     logger.info(f"Generating summary and assigning category for bookmark: {bookmark.get('url')}")
+    max_retries = 3
+    retry_count = 0
+    while retry_count < max_retries:
+        try:
+            # Rate Limiting Logic
+            with api_lock:
+                global last_api_call_time
+                current_time = time.time()
+                elapsed = current_time - last_api_call_time
+                if elapsed < 2:
+                    sleep_duration = 2 - elapsed
+                    logger.info(f"Sleeping for {sleep_duration:.2f} seconds to respect rate limits.")
+                    time.sleep(sleep_duration)
+                last_api_call_time = time.time()
+            html_content = bookmark.get('html_content', '')
+            soup = BeautifulSoup(html_content, 'html.parser')
+            metadata = get_page_metadata(soup)
+            main_content = extract_main_content(soup)
+            # Prepare content for the prompt
+            content_parts = []
+            if metadata['title']:
+                content_parts.append(f"Title: {metadata['title']}")
+            if metadata['description']:
+                content_parts.append(f"Description: {metadata['description']}")
+            if metadata['keywords']:
+                content_parts.append(f"Keywords: {metadata['keywords']}")
+            if main_content:
+                content_parts.append(f"Main Content: {main_content}")
+            content_text = '\n'.join(content_parts)
+            # Detect insufficient or erroneous content
+            error_keywords = ['Access Denied', 'Security Check', 'Cloudflare', 'captcha', 'unusual traffic']
+            if not content_text or len(content_text.split()) < 50:
+                use_prior_knowledge = True
+                logger.info(f"Content for {bookmark.get('url')} is insufficient. Instructing LLM to use prior knowledge.")
+            elif any(keyword.lower() in content_text.lower() for keyword in error_keywords):
+                use_prior_knowledge = True
+                logger.info(f"Content for {bookmark.get('url')} contains error messages. Instructing LLM to use prior knowledge.")
+            else:
+                use_prior_knowledge = False
+            if use_prior_knowledge:
+                prompt = f"""
 You are a knowledgeable assistant with up-to-date information as of 2023.
 URL: {bookmark.get('url')}
 Provide:
 Summary: [Your summary]
 Category: [One category]
 """
+            else:
+                prompt = f"""
 You are an assistant that creates concise webpage summaries and assigns categories.
 Content:
 {content_text}
 Category: [One category]
 """
+            def estimate_tokens(text):
+                return len(text) / 4
+            prompt_tokens = estimate_tokens(prompt)
+            max_tokens = 150
+            total_tokens = prompt_tokens + max_tokens
+            tokens_per_minute = 40000
+            tokens_per_second = tokens_per_minute / 60
+            required_delay = total_tokens / tokens_per_second
+            sleep_time = max(required_delay, 2)
+            response = openai.ChatCompletion.create(
+                model='llama-3.1-70b-versatile',
+                messages=[
+                    {"role": "user", "content": prompt}
+                ],
+                max_tokens=int(max_tokens),
+                temperature=0.5,
+            )
+            content = response['choices'][0]['message']['content'].strip()
+            if not content:
+                raise ValueError("Empty response received from the model.")
+            summary_match = re.search(r"Summary:\s*(.*)", content)
+            category_match = re.search(r"Category:\s*(.*)", content)
+            if summary_match:
+                bookmark['summary'] = summary_match.group(1).strip()
+            else:
+                bookmark['summary'] = 'No summary available.'
+            if category_match:
+                category = category_match.group(1).strip().strip('"')
+                if category in CATEGORIES:
+                    bookmark['category'] = category
+                else:
+                    bookmark['category'] = 'Uncategorized'
             else:
                 bookmark['category'] = 'Uncategorized'
+            # Simple keyword-based validation
+            summary_lower = bookmark['summary'].lower()
+            url_lower = bookmark['url'].lower()
+            if 'social media' in summary_lower or 'twitter' in summary_lower or 'x.com' in url_lower:
+                bookmark['category'] = 'Social Media'
+            elif 'wikipedia' in url_lower:
+                bookmark['category'] = 'Reference and Knowledge Bases'
+            logger.info("Successfully generated summary and assigned category")
+            time.sleep(sleep_time)
+            break
+        except openai.error.RateLimitError as e:
+            retry_count += 1
+            wait_time = int(e.headers.get("Retry-After", 5))
+            logger.warning(f"Rate limit reached. Waiting for {wait_time} seconds before retrying... (Attempt {retry_count}/{max_retries})")
+            time.sleep(wait_time)
+        except Exception as e:
+            logger.error(f"Error generating summary and assigning category: {e}", exc_info=True)
+            bookmark['summary'] = 'No summary available.'
+            bookmark['category'] = 'Uncategorized'
+            break
 def parse_bookmarks(file_content):
     """
         content = response.text
         logger.info(f"Fetched content length for {url}: {len(content)} characters")
         if response.status_code >= 500:
             bookmark['dead_link'] = True
             bookmark['description'] = ''
             bookmark['html_content'] = ''
             logger.info(f"Fetched information for {url}")
     except requests.exceptions.Timeout:
+        bookmark['dead_link'] = False
         bookmark['etag'] = 'N/A'
         bookmark['status_code'] = 'Timeout'
         bookmark['description'] = ''
         bookmark['html_content'] = ''
+        bookmark['slow_link'] = True
         logger.warning(f"Timeout while fetching {url}. Marking as 'Slow'.")
     except Exception as e:
         bookmark['dead_link'] = True
         embeddings = embedding_model.encode(summaries)
         dimension = embeddings.shape[1]
         index = faiss.IndexIDMap(faiss.IndexFlatL2(dimension))
         ids = np.array([bookmark['id'] for bookmark in bookmarks_list], dtype=np.int64)
         index.add_with_ids(np.array(embeddings).astype('float32'), ids)
         faiss_index = index
         if bookmark.get('dead_link'):
             status = "❌ Dead Link"
             card_style = "border: 2px solid red;"
+            text_style = "color: white;"
         elif bookmark.get('slow_link'):
+status = "⏳ Slow Response"
             card_style = "border: 2px solid orange;"
+            text_style = "color: white;"
         else:
             status = "✅ Active"
             card_style = "border: 2px solid green;"
+            text_style = "color: white;"
         title = bookmark['title']
         url = bookmark['url']
         category = bookmark.get('category', 'Uncategorized')
         # Escape HTML content to prevent XSS attacks
+        from html import escape
         title = escape(title)
         url = escape(url)
         summary = escape(summary)
     if file is None:
         logger.warning("No file uploaded")
+        return "Please upload a bookmarks HTML file.", '', state_bookmarks, display_bookmarks(), gr.update(choices=[])
     try:
         file_content = file.decode('utf-8')
     except UnicodeDecodeError as e:
         logger.error(f"Error decoding the file: {e}", exc_info=True)
+        return "Error decoding the file. Please ensure it's a valid HTML file.", '', state_bookmarks, display_bookmarks(), gr.update(choices=[])
     try:
         bookmarks = parse_bookmarks(file_content)
     except Exception as e:
         logger.error(f"Error parsing bookmarks: {e}", exc_info=True)
+        return "Error parsing the bookmarks HTML file.", '', state_bookmarks, display_bookmarks(), gr.update(choices=[])
     if not bookmarks:
         logger.warning("No bookmarks found in the uploaded file")
+        return "No bookmarks found in the uploaded file.", '', state_bookmarks, display_bookmarks(), gr.update(choices=[])
     # Assign unique IDs to bookmarks
     for idx, bookmark in enumerate(bookmarks):
     # Fetch bookmark info concurrently
     logger.info("Fetching URL info concurrently")
+    with ThreadPoolExecutor(max_workers=10) as executor:
         executor.map(fetch_url_info, bookmarks)
     # Process bookmarks concurrently with LLM calls
     logger.info("Processing bookmarks with LLM concurrently")
+    with ThreadPoolExecutor(max_workers=1) as executor:
         executor.map(generate_summary_and_assign_category, bookmarks)
     try:
         faiss_index = vectorize_and_index(bookmarks)
     except Exception as e:
         logger.error(f"Error building FAISS index: {e}", exc_info=True)
+        return "Error building search index.", '', state_bookmarks, display_bookmarks(), gr.update(choices=[])
     message = f"✅ Successfully processed {len(bookmarks)} bookmarks."
     logger.info(message)
     # Update state
     state_bookmarks = bookmarks.copy()
+    return message, bookmark_html, state_bookmarks, bookmark_html, gr.update(choices=choices)
 def delete_selected_bookmarks(selected_indices, state_bookmarks):
     """
     ids_to_delete = []
     indices_to_delete = []
     for s in selected_indices:
+        idx = int(s.split('.')[0]) - 1
+        if 0 <= idx < len(bookmarks):
+            bookmark_id = bookmarks[idx]['id']
+            ids_to_delete.append(bookmark_id)
+            indices_to_delete.append(idx)
+            logger.info(f"Deleting bookmark at index {idx + 1}")
     # Remove vectors from FAISS index
     if faiss_index is not None and ids_to_delete:
     if not new_category:
         return "⚠️ No new category selected.", gr.update(choices=[]), display_bookmarks(), state_bookmarks
+    indices = [int(s.split('.')[0])-1 for s in selected_indices]
     for idx in indices:
+        if 0 <= idx < len(bookmarks):
+            bookmarks[idx]['category'] = new_category
+            logger.info(f"Updated category for bookmark {idx + 1} to {new_category}")
     message = "✏️ Category updated for selected bookmarks."
     logger.info(message)
     """
     if not bookmarks:
         logger.warning("No bookmarks to export")
+        return None
     try:
         logger.info("Exporting bookmarks to HTML")
             dl.append(dt)
         soup.append(dl)
         html_content = str(soup)
         output_file = "exported_bookmarks.html"
         with open(output_file, 'w', encoding='utf-8') as f:
             f.write(html_content)
         logger.info("Bookmarks exported successfully")
+        return output_file
     except Exception as e:
         logger.error(f"Error exporting bookmarks: {e}", exc_info=True)
+        return None
 def chatbot_response(user_query, chat_history):
     """
+    Generate chatbot response using the FAISS index and embeddings.
     """
     if not bookmarks or faiss_index is None:
         logger.warning("No bookmarks available for chatbot")
     logger.info(f"Chatbot received query: {user_query}")
     try:
         chat_history.append({"role": "user", "content": user_query})
         with api_lock:
             global last_api_call_time
             current_time = time.time()
                 sleep_duration = 2 - elapsed
                 logger.info(f"Sleeping for {sleep_duration:.2f} seconds to respect rate limits.")
                 time.sleep(sleep_duration)
+            last_api_call_time = time.time()
         query_vector = embedding_model.encode([user_query]).astype('float32')
+        k = 5
         distances, ids = faiss_index.search(query_vector, k)
         ids = ids.flatten()
         id_to_bookmark = {bookmark['id']: bookmark for bookmark in bookmarks}
         matching_bookmarks = [id_to_bookmark.get(id) for id in ids if id in id_to_bookmark]
             chat_history.append({"role": "assistant", "content": answer})
             return chat_history
         bookmarks_info = "\n".join([
             f"Title: {bookmark['title']}\nURL: {bookmark['url']}\nSummary: {bookmark['summary']}"
             for bookmark in matching_bookmarks
         ])
         prompt = f"""
 A user asked: "{user_query}"
 Based on the bookmarks below, provide a helpful answer to the user's query, referencing the relevant bookmarks.
 Provide a concise and helpful response.
 """
+        def estimate_tokens(text):
+            return len(text) / 4
+        prompt_tokens = estimate_tokens(prompt)
+        max_tokens = 300
+        total_tokens = prompt_tokens + max_tokens
+        tokens_per_minute = 40000
+        tokens_per_second = tokens_per_minute / 60
+        required_delay = total_tokens / tokens_per_second
+        sleep_time = max(required_delay, 2)
+        response = openai.ChatCompletion.create(
+            model='llama-3.1-70b-versatile',
+            messages=[
+                {"role": "user", "content": prompt}
+            ],
+            max_tokens=int(max_tokens),
+            temperature=0.7,
+        )
+        answer = response['choices'][0]['message']['content'].strip()
         logger.info("Chatbot response generated")
+        time.sleep(sleep_time)
         chat_history.append({"role": "assistant", "content": answer})
         return chat_history
+    except openai.error.RateLimitError as e:
+        wait_time = int(e.headers.get("Retry-After", 5))
+        logger.warning(f"Rate limit reached. Waiting for {wait_time} seconds before retrying...")
+        time.sleep(wait_time)
+        return chatbot_response(user_query, chat_history)
     except Exception as e:
         error_message = f"⚠️ Error processing your query: {str(e)}"
         logger.error(error_message, exc_info=True)
             # Initialize state
             state_bookmarks = gr.State([])
             # General Overview
             gr.Markdown("""
 # 📚 SmartMarks - AI Browser Bookmarks Manager
 Navigate through the tabs to explore each feature in detail.
 """)
+            # Upload and Process Bookmarks Tab
             with gr.Tab("Upload and Process Bookmarks"):
                 gr.Markdown("""
 ## 📂 **Upload and Process Bookmarks**
 3. **View Processed Bookmarks:**
    - Once processing is complete, your bookmarks will be displayed in an organized and visually appealing format below.
 """)
                 upload = gr.File(label="📁 Upload Bookmarks HTML File", type='binary')
                 process_button = gr.Button("⚙️ Process Bookmarks")
                 output_text = gr.Textbox(label="✅ Output", interactive=False)
                 bookmark_display = gr.HTML(label="📄 Processed Bookmarks")
+            # Chat with Bookmarks Tab
             with gr.Tab("Chat with Bookmarks"):
                 gr.Markdown("""
 ## 💬 **Chat with Bookmarks**
 4. **View Chat History:**
    - All your queries and the corresponding AI responses are displayed in the chat history for your reference.
 """)
                 chatbot = gr.Chatbot(label="💬 Chat with SmartMarks", type='messages')
                 user_input = gr.Textbox(
                     label="✍️ Ask about your bookmarks",
                     outputs=chatbot
                 )
+            # Manage Bookmarks Tab
             with gr.Tab("Manage Bookmarks"):
                 gr.Markdown("""
+## 🛠️ **Manage Bookmarks
 ### 🗂️ **Features:**
 1. **View Bookmarks:**
 6. **Refresh Bookmarks:**
    - Click the **"🔄 Refresh Bookmarks"** button to ensure the latest state is reflected in the display.
 """)
                 manage_output = gr.Textbox(label="🔄 Status", interactive=False)
+                # Move bookmark_selector definition here
+                bookmark_selector = gr.CheckboxGroup(
+                    label="✅ Select Bookmarks",
+                    choices=[]
+                )
                 new_category = gr.Dropdown(
                     label="🆕 New Category",
                     choices=CATEGORIES,
                 )
                 bookmark_display_manage = gr.HTML(label="📄 Bookmarks")
                 with gr.Row():
                     delete_button = gr.Button("🗑️ Delete Selected")
                     edit_category_button = gr.Button("✏️ Edit Category")
                 download_link = gr.File(label="📥 Download Exported Bookmarks")
+                # Update process_button to use the bookmark_selector in Manage tab
+                process_button.click(
+                    process_uploaded_file,
+                    inputs=[upload, state_bookmarks],
+                    outputs=[output_text, bookmark_display, state_bookmarks, bookmark_display, bookmark_selector]
+                )
                 delete_button.click(
                     delete_selected_bookmarks,
                     inputs=[bookmark_selector, state_bookmarks],
                 refresh_button.click(
                     lambda state_bookmarks: (
                         [
+                            f"{i+1}. {bookmark['title']} (Category: {bookmark['category']})"
+                            for i, bookmark in enumerate(state_bookmarks)
                         ],
                         display_bookmarks()
                     ),
         logger.info("Launching Gradio app")
         demo.launch(debug=True)
     except Exception as e:
         logger.error(f"Error building the app: {e}", exc_info=True)
         print(f"Error building the app: {e}")
+if __name__ == "__main__":