Tesneem commited on
Commit
3d14986
·
verified ·
1 Parent(s): 713f32a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -3
app.py CHANGED
@@ -11,7 +11,10 @@ embedding_model = SentenceTransformer("thenlper/gte-large")
11
 
12
  # Example dataset with genres (replace with your actual data)
13
  dataset = load_dataset("hugginglearners/netflix-shows")
 
14
 
 
 
15
  # Combine description and genre for embedding
16
  def combine_description_title_and_genre(description, listed_in, title):
17
  return f"{description} Genre: {listed_in} Title: {title}"
@@ -25,19 +28,19 @@ def vector_search(query):
25
  query_embedding = get_embedding(query)
26
 
27
  # Generate embeddings for the combined description and genre
28
- embeddings = np.array([get_embedding(combine_description_title_and_genre(item["description"], item["listed_in"],item["title"])) for item in dataset])
29
 
30
  # Calculate cosine similarity between the query and all embeddings
31
  similarities = cosine_similarity([query_embedding], embeddings)
32
 
33
  # Adjust similarity scores based on ratings
34
- ratings = np.array([item["rating"] for item in dataset])
35
  adjusted_similarities = similarities * ratings.reshape(-1, 1)
36
 
37
  # Get top N most similar items (e.g., top 3)
38
  top_n = 3
39
  top_indices = adjusted_similarities[0].argsort()[-top_n:][::-1] # Get indices of the top N results
40
- top_items = [dataset[i] for i in top_indices]
41
 
42
  # Format the output for display
43
  search_result = ""
 
11
 
12
  # Example dataset with genres (replace with your actual data)
13
  dataset = load_dataset("hugginglearners/netflix-shows")
14
+ data = dataset['train'] # Accessing the 'train' split of the dataset
15
 
16
+ # Convert the dataset to a list of dictionaries for easier indexing
17
+ data_list = data.to_dict()
18
  # Combine description and genre for embedding
19
  def combine_description_title_and_genre(description, listed_in, title):
20
  return f"{description} Genre: {listed_in} Title: {title}"
 
28
  query_embedding = get_embedding(query)
29
 
30
  # Generate embeddings for the combined description and genre
31
+ embeddings = np.array([get_embedding(combine_description_title_and_genre(item["description"], item["listed_in"],item["title"])) for item in data_list])
32
 
33
  # Calculate cosine similarity between the query and all embeddings
34
  similarities = cosine_similarity([query_embedding], embeddings)
35
 
36
  # Adjust similarity scores based on ratings
37
+ ratings = np.array([item["rating"] for item in data_list])
38
  adjusted_similarities = similarities * ratings.reshape(-1, 1)
39
 
40
  # Get top N most similar items (e.g., top 3)
41
  top_n = 3
42
  top_indices = adjusted_similarities[0].argsort()[-top_n:][::-1] # Get indices of the top N results
43
+ top_items = [data_list[i] for i in top_indices]
44
 
45
  # Format the output for display
46
  search_result = ""