File size: 4,449 Bytes
209ff0f
 
81972f4
 
209ff0f
 
 
 
 
81972f4
 
 
 
 
 
 
 
 
209ff0f
81972f4
 
 
9bd0f62
81972f4
9bd0f62
81972f4
 
 
 
 
 
9bd0f62
 
 
 
 
 
81972f4
9bd0f62
 
 
209ff0f
 
9bd0f62
edd7db1
 
81972f4
6dc7c2d
81972f4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6dc7c2d
 
81972f4
edd7db1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209ff0f
9bd0f62
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import streamlit as st
from bs4 import BeautifulSoup
from urllib.parse import urlparse
from collections import Counter

def clean_bookmarks(html_content):
    soup = BeautifulSoup(html_content, 'html.parser')
    links = soup.find_all('a')
    
    # Extract URLs and domains
    urls = [link.get('href') for link in links]
    domains = [urlparse(url).netloc for url in urls]
    
    # Count domains
    domain_counts = Counter(domains)
    
    # Create a dictionary with domain as key and list of URLs as value
    domain_url_dict = {}
    for link in links:
        url = link.get('href')
        domain = urlparse(url).netloc
        if domain in domain_url_dict:
            domain_url_dict[domain].append((url, link.text.strip()))
        else:
            domain_url_dict[domain] = [(url, link.text.strip())]
    
    # Sort domains by count in descending order
    sorted_domains = sorted(domain_url_dict.items(), key=lambda item: domain_counts[item[0]], reverse=True)
    
    # Build cleaned HTML
    cleaned_html = '<html><body>\n'
    cleaned_markdown = ''
    for domain, url_anchors in sorted_domains:
        cleaned_html += f'<h2>{domain}</h2>\n'
        cleaned_markdown += f'## {domain}\n'
        url_anchors.sort(key=lambda x: x[1])  # Sort URLs by anchor text
        for url, anchor_name in url_anchors:
            cleaned_html += f'<a href="{url}">{anchor_name}</a><br>\n'
            cleaned_markdown += f'[{anchor_name}]({url})\n'
        cleaned_html += '<br>\n'
        cleaned_markdown += '\n'
    cleaned_html += '</body></html>'
    
    return cleaned_html, cleaned_markdown
def Instructions():
            
        instructions = '''
        To export your Google Chrome bookmarks, including those on the bookmark bar, and curate the list, follow these steps:
        **Export bookmarks:**
        1. Open Google Chrome and click on the three-dot menu icon in the top-right corner.
        2. Go to "Bookmarks" > "Bookmark manager" or press Ctrl+Shift+O (Windows) or Cmd+Option+B (Mac).
        3. In the Bookmark Manager, click on the three-dot menu icon and select "Export bookmarks."
        4. Choose a location to save the HTML file containing your bookmarks and click "Save."
        **Curate the bookmarks:**
        1. Open the exported HTML file in a text editor like Notepad++ (Windows) or TextEdit (Mac).
        2. Locate the section containing your bookmarks. It will be enclosed within `<DL><p>` tags.
        3. Find the bookmark bar section, which is usually labeled with `<DT><H3 ADD_DATE="..." LAST_MODIFIED="...">Bookmarks bar</H3>`.
        4. Delete any unwanted bookmarks by removing the entire `<DT><A HREF="...">...</A>` line corresponding to that bookmark.
        5. Organize the remaining bookmarks by moving the `<DT><A HREF="...">...</A>` lines within the bookmark bar section.
        6. Save the edited HTML file.
        **Import the curated bookmarks:**
        1. In Google Chrome, open the Bookmark Manager again.
        2. Click on the three-dot menu icon and select "Import bookmarks."
        3. Choose the edited HTML file you saved in step 2 and click "Open."
        4. Your curated bookmarks will now be imported into Chrome, replacing the previous set of bookmarks.
        By following these steps, you can export your Google Chrome bookmarks, curate the list by removing unwanted bookmarks and organizing the remaining ones, and then import the curated list back into Chrome. This process allows you to keep your bookmark bar clean and organized with the bookmarks you use daily.
        '''
        st.markdown(instructions)

    
def main():
    st.title('Bookmark File Cleaner')
    Instructions()
    
    uploaded_file = st.file_uploader('Choose an HTML bookmark file', type=['html'])
    
    if uploaded_file is not None:
        html_content = uploaded_file.read().decode('utf-8')
        cleaned_html, cleaned_markdown = clean_bookmarks(html_content)
        
        st.subheader('Cleaned Bookmarks')
        st.text_area('Output HTML', value=cleaned_html, height=400)
        st.text_area('Output Markdown', value=cleaned_markdown, height=400)
        
        output_file = 'cleaned_bookmarks.html'
        with open(output_file, 'w') as f:
            f.write(cleaned_html)
        
        st.download_button('Download Cleaned Bookmarks', cleaned_html, file_name=output_file)

        st.markdown(cleaned_markdown)


if __name__ == '__main__':
    main()