File size: 4,719 Bytes
ea8c682
 
 
 
 
 
 
 
1dea137
ea8c682
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26b74d2
 
 
ea8c682
1dea137
ea8c682
 
 
1dea137
 
ea8c682
1dea137
 
 
ea8c682
1dea137
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e54660d
1dea137
 
e54660d
 
 
 
1dea137
e54660d
 
 
 
 
 
 
 
1dea137
 
ea8c682
1dea137
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import streamlit as st
import pandas as pd
import os
from huggingface_hub import hf_hub_download


# Page configuration
st.set_page_config(
    page_title="Title-2-BookName",
    page_icon="📚",
    layout="centered",
    initial_sidebar_state="collapsed"
)

# Custom CSS for better styling
st.markdown("""
    <style>
    .main {
        padding: 2rem;
    }
    .stApp {
        background-color: #f5f7fa;
    }
    .stTitle {
        font-size: 3rem !important;
        margin-bottom: 1.5rem !important;
    }
    .search-container {
        background-color: white;
        padding: 2rem;
        border-radius: 10px;
        box-shadow: 0 2px 10px rgba(0,0,0,0.1);
    }
    .results-container {
        margin-top: 2rem;
        background-color: white;
        padding: 2rem;
        border-radius: 10px;
        box-shadow: 0 2px 10px rgba(0,0,0,0.1);
    }
    .st-emotion-cache-1n76uvr {
        font-size: 1.2rem;
    }
    </style>
""", unsafe_allow_html=True)

# Load the CSV file with caching
@st.cache_data
def load_data():
    try:
        HUGGINGFACE_TOKEN = os.getenv("HF_TOKEN")
        if not HUGGINGFACE_TOKEN:
            st.sidebar.error("HF_TOKEN environment variable is not set")
            return pd.DataFrame()
            
        csv_file = hf_hub_download(
            repo_id="Skoob/test-private",
            filename="2025Feb_3_Data_2k_267k.csv",
            use_auth_token=HUGGINGFACE_TOKEN
        )
        df = pd.read_csv(csv_file)
        # Prepare dataframe
        df = df.copy()
        return df
    except Exception as e:
        st.sidebar.error(f"Error loading data: {str(e)}")
        return pd.DataFrame()

st.title(":blue[Title-2-BookName]")

# Load data
title_df = load_data()

# Main search functionality
def get_file_info(user_title):
    if title_df.empty:
        return None
    
    # Always case-insensitive search with exact match only
    search_title = user_title.lower()
    result = title_df.loc[title_df['Title'].str.lower() == search_title]
    
    if not result.empty:
        # Remove 'BookSummary-' and '.docx' from the file names
        result_copy = result.copy()
        result_copy['Clean_name'] = result_copy['File_name'].str.replace("BookSummary-", "").str.replace(".docx", "")
        # Select relevant columns and reset index
        return result_copy[['Title', 'Clean_name', 'File_name']].reset_index(drop=True)
    else:
        return None

# Main app container
col1 = st.container()  # Use a single container for both input and button

with col1:
    user_titles = st.text_area("Enter book titles (one per line):")  # Input multiple titles separated by a newline
    search_button = st.button("🔍 Search", use_container_width=True)

# Search results display
if user_titles and search_button:
    titles_list = user_titles.strip().split("\n")  # Split the input into a list of titles
    
    if titles_list:
        # Initialize an empty DataFrame to store the results
        all_results = pd.DataFrame(columns=['Title', 'Clean_name', 'File_name'])
        
        with st.spinner("Searching..."):
            for title in titles_list:
                result_df = get_file_info(title.strip())
                if result_df is not None and not result_df.empty:
                    all_results = pd.concat([all_results, result_df], ignore_index=True)

        if not all_results.empty:
            st.success(f"Found {len(all_results)} matching result(s)!")

            # Display results in a nice table with copy buttons
            st.dataframe(
                all_results,
                column_config={
                    "Title": st.column_config.TextColumn("Original Title"),
                    "Clean_name": st.column_config.TextColumn("Book Name"),
                    "File_name": st.column_config.TextColumn("File Name")
                },
                use_container_width=True,
                hide_index=False
            )
            
            # # Display all file names in a list format
            # st.subheader("All File Names:")
            # for idx, row in all_results.iterrows():
            #     st.code(row['File_name'], language=None)

            # # Add option to copy all file names as text
            # all_filenames = '\n'.join(all_results['File_name'].tolist())
            # st.download_button(
            #     label="📋 Copy all file names",
            #     data=all_filenames,
            #     file_name=f"filenames_{'_'.join(titles_list)}.txt",
            #     mime="text/plain"
            # )
        else:
            st.warning("No results found. Try different search options or check your spelling.")
    else:
        st.warning("Please enter at least one book title.")