danschnurp commited on
Commit
6c4addd
·
verified ·
1 Parent(s): ffd7939

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -7
app.py CHANGED
@@ -74,7 +74,7 @@ def build_faiss_index(dataset: pd.DataFrame) -> Tuple[faiss.IndexFlatIP, np.ndar
74
  return index
75
 
76
 
77
- def compute_correlations_faiss(index: faiss.IndexFlatIP, dataset,
78
  target_book, ) -> pd.DataFrame:
79
  print(target_book, type(target_book))
80
  emb = create_embedding([target_book[0]])
@@ -82,16 +82,31 @@ def compute_correlations_faiss(index: faiss.IndexFlatIP, dataset,
82
 
83
 
84
  # Perform the search
85
- k = len(dataset) # Search for all books
86
  similarities, I = index.search(emb.astype('float16'), k)
87
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  corr_df = pd.DataFrame({
89
- 'book': [[dataset["Book-Title"][j] for j in list(dataset["Book-Title"])[i]] for i in I[0]],
90
- 'corr': similarities[0],
91
  })
92
  return corr_df.sort_values('corr', ascending=False)
93
 
94
 
 
95
  def load_and_prepare_data():
96
  global dataset, faiss_index, normalized_data, book_titles
97
 
@@ -127,11 +142,10 @@ def recommend_books(target_book: str, num_recommendations: int = 10) -> str:
127
  closest_match = process.extractOne(target_book, book_titles)
128
 
129
 
130
- correlations = compute_correlations_faiss(faiss_index, dataset, closest_match)
131
 
132
  recommendations = correlations[correlations['book'] != target_book].head(num_recommendations)
133
 
134
-
135
  result = f"Top {num_recommendations} recommendations for '{target_book}':\n\n"
136
  for i, (_, row) in enumerate(recommendations.iterrows(), 1):
137
  result += f"{i}. {row['book']} (Correlation: {row['corr']:.2f})\n"
@@ -141,7 +155,6 @@ def recommend_books(target_book: str, num_recommendations: int = 10) -> str:
141
 
142
  # Create Gradio interface
143
  iface = gr.Interface(
144
- theme="light",
145
  fn=recommend_books,
146
  inputs=[
147
  gr.Textbox(label="Enter a book title"),
 
74
  return index
75
 
76
 
77
+ def compute_correlations_faiss(index: faiss.IndexFlatIP, book_titles: List[str],
78
  target_book, ) -> pd.DataFrame:
79
  print(target_book, type(target_book))
80
  emb = create_embedding([target_book[0]])
 
82
 
83
 
84
  # Perform the search
85
+ k = len(book_titles) # Search for all books
86
  similarities, I = index.search(emb.astype('float16'), k)
87
 
88
+ # # Reduce database and query vectors to 2D for visualization
89
+ # pca = PCA(n_components=2)
90
+ # reduced_db = pca.fit_transform(data)
91
+ # reduced_query = pca.transform(target_vector)
92
+ #
93
+ # # Scatter plot
94
+ # plt.scatter(reduced_db[:, 0], reduced_db[:, 1], label='Database Vectors', alpha=0.5)
95
+ # plt.scatter(reduced_query[:, 0], reduced_query[:, 1], label='Query Vectors', marker='X', color='red')
96
+ # plt.legend()
97
+ # plt.title("PCA Projection of IndexFlatIP Vectors")
98
+ # plt.show()
99
+
100
+
101
+
102
  corr_df = pd.DataFrame({
103
+ 'book': [book_titles[i] for i in I[0]],
104
+ 'corr': similarities[0]
105
  })
106
  return corr_df.sort_values('corr', ascending=False)
107
 
108
 
109
+
110
  def load_and_prepare_data():
111
  global dataset, faiss_index, normalized_data, book_titles
112
 
 
142
  closest_match = process.extractOne(target_book, book_titles)
143
 
144
 
145
+ correlations = compute_correlations_faiss(faiss_index, book_titles, closest_match)
146
 
147
  recommendations = correlations[correlations['book'] != target_book].head(num_recommendations)
148
 
 
149
  result = f"Top {num_recommendations} recommendations for '{target_book}':\n\n"
150
  for i, (_, row) in enumerate(recommendations.iterrows(), 1):
151
  result += f"{i}. {row['book']} (Correlation: {row['corr']:.2f})\n"
 
155
 
156
  # Create Gradio interface
157
  iface = gr.Interface(
 
158
  fn=recommend_books,
159
  inputs=[
160
  gr.Textbox(label="Enter a book title"),