Spaces:

danschnurp
/

book-recommender

Sleeping

App Files Files Community

danschnurp commited on Dec 8, 2024

Commit

6c4addd

verified ·

1 Parent(s): ffd7939

Create app.py

Browse files

Files changed (1) hide show

app.py +20 -7

app.py CHANGED Viewed

@@ -74,7 +74,7 @@ def build_faiss_index(dataset: pd.DataFrame) -> Tuple[faiss.IndexFlatIP, np.ndar
     return index
-def compute_correlations_faiss(index: faiss.IndexFlatIP, dataset,
                                target_book, ) -> pd.DataFrame:
     print(target_book, type(target_book))
     emb = create_embedding([target_book[0]])
@@ -82,16 +82,31 @@ def compute_correlations_faiss(index: faiss.IndexFlatIP, dataset,
     # Perform the search
-    k = len(dataset)  # Search for all books
     similarities, I = index.search(emb.astype('float16'), k)
     corr_df = pd.DataFrame({
-        'book': [[dataset["Book-Title"][j] for j in list(dataset["Book-Title"])[i]] for i in I[0]],
-        'corr': similarities[0],
     })
     return corr_df.sort_values('corr', ascending=False)
 def load_and_prepare_data():
     global dataset, faiss_index, normalized_data, book_titles
@@ -127,11 +142,10 @@ def recommend_books(target_book: str, num_recommendations: int = 10) -> str:
     closest_match = process.extractOne(target_book, book_titles)
-    correlations = compute_correlations_faiss(faiss_index, dataset, closest_match)
     recommendations = correlations[correlations['book'] != target_book].head(num_recommendations)
     result = f"Top {num_recommendations} recommendations for '{target_book}':\n\n"
     for i, (_, row) in enumerate(recommendations.iterrows(), 1):
         result += f"{i}. {row['book']} (Correlation: {row['corr']:.2f})\n"
@@ -141,7 +155,6 @@ def recommend_books(target_book: str, num_recommendations: int = 10) -> str:
 # Create Gradio interface
 iface = gr.Interface(
-    theme="light",
     fn=recommend_books,
     inputs=[
         gr.Textbox(label="Enter a book title"),

     return index
+def compute_correlations_faiss(index: faiss.IndexFlatIP, book_titles: List[str],
                                target_book, ) -> pd.DataFrame:
     print(target_book, type(target_book))
     emb = create_embedding([target_book[0]])
     # Perform the search
+    k = len(book_titles)  # Search for all books
     similarities, I = index.search(emb.astype('float16'), k)
+    # # Reduce database and query vectors to 2D for visualization
+    # pca = PCA(n_components=2)
+    # reduced_db = pca.fit_transform(data)
+    # reduced_query = pca.transform(target_vector)
+    #
+    # # Scatter plot
+    # plt.scatter(reduced_db[:, 0], reduced_db[:, 1], label='Database Vectors', alpha=0.5)
+    # plt.scatter(reduced_query[:, 0], reduced_query[:, 1], label='Query Vectors', marker='X', color='red')
+    # plt.legend()
+    # plt.title("PCA Projection of IndexFlatIP Vectors")
+    # plt.show()
     corr_df = pd.DataFrame({
+        'book': [book_titles[i] for i in I[0]],
+        'corr': similarities[0]
     })
     return corr_df.sort_values('corr', ascending=False)
 def load_and_prepare_data():
     global dataset, faiss_index, normalized_data, book_titles
     closest_match = process.extractOne(target_book, book_titles)
+    correlations = compute_correlations_faiss(faiss_index, book_titles, closest_match)
     recommendations = correlations[correlations['book'] != target_book].head(num_recommendations)
     result = f"Top {num_recommendations} recommendations for '{target_book}':\n\n"
     for i, (_, row) in enumerate(recommendations.iterrows(), 1):
         result += f"{i}. {row['book']} (Correlation: {row['corr']:.2f})\n"
 # Create Gradio interface
 iface = gr.Interface(
     fn=recommend_books,
     inputs=[
         gr.Textbox(label="Enter a book title"),