File size: 2,812 Bytes
280d87f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import numpy as np

from .config import vectorizer
from .get_default_weight import destinations, weights_bias_vector


def get_des_accumulation(question_vector, weights_bias_vector):
    accumulation = 0
    for index in range(len(weights_bias_vector)):
        if question_vector[index]==1:
            accumulation += weights_bias_vector[index]
            
    return accumulation

def get_destinations_list(question_vector, top_k):
    des = destinations
    des = des[1:].reset_index(drop=True)
    """
    This function calculates the accumulated scores for each destination based on the given question vector and weights vector.
    It then selects the top 5 destinations with the highest scores and returns their names.
    Parameters:
    question_vector (numpy.ndarray): A 1D numpy array representing the question vector. Each element corresponds to a tag, and its value is 1 if the tag is present in the question, and 0 otherwise.
    weights_bias_vector (numpy.ndarray): A 2D numpy array representing the weights vector. Each row corresponds to a destination, and each column corresponds to a tag. The value at each position represents the weight of the tag for that destination.
    Returns:
    destinations_list: A list of strings representing the names of the top k destinations with the highest scores.
    """
    accumulation_dict = {}
    for index in range(len(weights_bias_vector)):
        accumulation = get_des_accumulation(question_vector[0], weights_bias_vector[index])
        accumulation_dict[str(index)] = accumulation
    
    top_keys = sorted(accumulation_dict, key=accumulation_dict.get, reverse=True)[:top_k]
    scores = [accumulation_dict[key] for key in top_keys]
    q1_score = np.percentile(scores, 25)
    destinations_list = []
    for key in top_keys:
        if accumulation_dict[key] > q1_score:
            destinations_list.append(des["name"][int(key)])
            print(f"{des['name'][int(key)]}: {accumulation_dict[key]}")
    
    return destinations_list

def get_question_vector(question_tags):
    """
    Generate a question vector based on the given list of question tags.

    Parameters:
    question_tags (list): A list of strings representing the tags associated with the question.
        Each tag is a word or phrase that describes a characteristic of a destination.

    Returns:
    numpy.ndarray: A 2D numpy array representing the question vector.
        The array is transformed from the input list of question tags using a vectorizer.
        Each row in the array corresponds to a tag, and its value is either 0 or 1.
        The length of each row is equal to the number of unique tags in the dataset.
    """
    question_tags = [question_tags]
    question_vector = vectorizer.transform(question_tags).toarray()
    return question_vector