Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
CHANGED
@@ -74,7 +74,7 @@ def build_faiss_index(dataset: pd.DataFrame) -> Tuple[faiss.IndexFlatIP, np.ndar
|
|
74 |
return index
|
75 |
|
76 |
|
77 |
-
def compute_correlations_faiss(index: faiss.IndexFlatIP,
|
78 |
target_book, ) -> pd.DataFrame:
|
79 |
print(target_book, type(target_book))
|
80 |
emb = create_embedding([target_book[0]])
|
@@ -82,16 +82,31 @@ def compute_correlations_faiss(index: faiss.IndexFlatIP, dataset,
|
|
82 |
|
83 |
|
84 |
# Perform the search
|
85 |
-
k = len(
|
86 |
similarities, I = index.search(emb.astype('float16'), k)
|
87 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
corr_df = pd.DataFrame({
|
89 |
-
'book': [[
|
90 |
-
'corr': similarities[0]
|
91 |
})
|
92 |
return corr_df.sort_values('corr', ascending=False)
|
93 |
|
94 |
|
|
|
95 |
def load_and_prepare_data():
|
96 |
global dataset, faiss_index, normalized_data, book_titles
|
97 |
|
@@ -127,11 +142,10 @@ def recommend_books(target_book: str, num_recommendations: int = 10) -> str:
|
|
127 |
closest_match = process.extractOne(target_book, book_titles)
|
128 |
|
129 |
|
130 |
-
correlations = compute_correlations_faiss(faiss_index,
|
131 |
|
132 |
recommendations = correlations[correlations['book'] != target_book].head(num_recommendations)
|
133 |
|
134 |
-
|
135 |
result = f"Top {num_recommendations} recommendations for '{target_book}':\n\n"
|
136 |
for i, (_, row) in enumerate(recommendations.iterrows(), 1):
|
137 |
result += f"{i}. {row['book']} (Correlation: {row['corr']:.2f})\n"
|
@@ -141,7 +155,6 @@ def recommend_books(target_book: str, num_recommendations: int = 10) -> str:
|
|
141 |
|
142 |
# Create Gradio interface
|
143 |
iface = gr.Interface(
|
144 |
-
theme="light",
|
145 |
fn=recommend_books,
|
146 |
inputs=[
|
147 |
gr.Textbox(label="Enter a book title"),
|
|
|
74 |
return index
|
75 |
|
76 |
|
77 |
+
def compute_correlations_faiss(index: faiss.IndexFlatIP, book_titles: List[str],
|
78 |
target_book, ) -> pd.DataFrame:
|
79 |
print(target_book, type(target_book))
|
80 |
emb = create_embedding([target_book[0]])
|
|
|
82 |
|
83 |
|
84 |
# Perform the search
|
85 |
+
k = len(book_titles) # Search for all books
|
86 |
similarities, I = index.search(emb.astype('float16'), k)
|
87 |
|
88 |
+
# # Reduce database and query vectors to 2D for visualization
|
89 |
+
# pca = PCA(n_components=2)
|
90 |
+
# reduced_db = pca.fit_transform(data)
|
91 |
+
# reduced_query = pca.transform(target_vector)
|
92 |
+
#
|
93 |
+
# # Scatter plot
|
94 |
+
# plt.scatter(reduced_db[:, 0], reduced_db[:, 1], label='Database Vectors', alpha=0.5)
|
95 |
+
# plt.scatter(reduced_query[:, 0], reduced_query[:, 1], label='Query Vectors', marker='X', color='red')
|
96 |
+
# plt.legend()
|
97 |
+
# plt.title("PCA Projection of IndexFlatIP Vectors")
|
98 |
+
# plt.show()
|
99 |
+
|
100 |
+
|
101 |
+
|
102 |
corr_df = pd.DataFrame({
|
103 |
+
'book': [book_titles[i] for i in I[0]],
|
104 |
+
'corr': similarities[0]
|
105 |
})
|
106 |
return corr_df.sort_values('corr', ascending=False)
|
107 |
|
108 |
|
109 |
+
|
110 |
def load_and_prepare_data():
|
111 |
global dataset, faiss_index, normalized_data, book_titles
|
112 |
|
|
|
142 |
closest_match = process.extractOne(target_book, book_titles)
|
143 |
|
144 |
|
145 |
+
correlations = compute_correlations_faiss(faiss_index, book_titles, closest_match)
|
146 |
|
147 |
recommendations = correlations[correlations['book'] != target_book].head(num_recommendations)
|
148 |
|
|
|
149 |
result = f"Top {num_recommendations} recommendations for '{target_book}':\n\n"
|
150 |
for i, (_, row) in enumerate(recommendations.iterrows(), 1):
|
151 |
result += f"{i}. {row['book']} (Correlation: {row['corr']:.2f})\n"
|
|
|
155 |
|
156 |
# Create Gradio interface
|
157 |
iface = gr.Interface(
|
|
|
158 |
fn=recommend_books,
|
159 |
inputs=[
|
160 |
gr.Textbox(label="Enter a book title"),
|