File size: 3,408 Bytes
3b86501
 
 
 
 
 
c8df78e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3b86501
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
from typing import List
from modules.classify import classify, map_scores, get_first_relevant_label

labels = ["something else", "headlines, news channels, news articles, breaking news", "politics, policy and politicians", "health care and public health", "religious"]


def handle_text_content(platform, items):
    texts = []
    for item in items:
        if platform == "reddit" and item.title:
            text = item.title +"\n"+ item.text
        else:
            text = item.text
        
        if len(text) <=5:
            text = "NON-VALID"

        texts.append(text)
    return texts


def redistribute(platform, items):
    predicted_labels = classify(texts=handle_text_content(platform=platform, items=items), labels=labels)
    mapped_scores = map_scores(predicted_labels=predicted_labels, default_label="something else")
    first_topic, insertion_pos = get_first_relevant_label(predicted_labels=predicted_labels, mapped_scores=mapped_scores, default_label="something else")
    # TODO include parent linking
    reranked_ids, _ = distribute_evenly(ids=[item.id for item in items], scores=mapped_scores)
    return reranked_ids, first_topic, insertion_pos


def distribute_evenly(ids: List[str], scores: List[float]):
    """
    This method receives a list of ids and a list of scores. Scores distinct to zero get distributed evenly along the list, preserving cardinality.
    """
    # Indices of non-zero scores and total scores length
    non_zero_indices = [i for i, score in enumerate(scores) if score != 0]
    non_zero_count = len(non_zero_indices)
    total_scores = len(scores)

    if non_zero_count == 0:
        return ids[:], scores[:]  # Return copies if no non-zero scores

    # Calculate new positions for non-zero scores evenly distributed
    interval = total_scores // non_zero_count
    new_positions = [(i * interval) for i in range(non_zero_count)]

    # Initialize new scores and ids arrays
    new_scores = [0] * total_scores
    new_objects_order = [None] * total_scores

    # Place the non-zero scores and corresponding objects in the new positions
    for new_pos, old_pos in zip(new_positions, non_zero_indices):
        new_scores[new_pos] = scores[old_pos]
        new_objects_order[new_pos] = ids[old_pos]

    # Fill in the gaps in the objects list
    used_indices = set(non_zero_indices)  # Set for quick lookup
    fill_index = 0  # Tracker for filling positions

    for i in range(total_scores):
        if new_objects_order[i] is None:
            # Skip indices that have been used
            while fill_index in used_indices:
                fill_index += 1
            if fill_index < total_scores:
                new_objects_order[i] = ids[fill_index]
                fill_index += 1

    return new_objects_order, new_scores


def insert_element_at_position(lst, element, position):
    """
    Inserts an element at a specific position in the list.
    
    Args:
    lst (list): The list to insert the element into.
    element (any): The element to insert into the list.
    position (int): The index at which to insert the element.
    
    Returns:
    list: The list with the element inserted.
    """
    # Ensure the position is within the bounds of the list plus one for appending
    position = min(position, len(lst))
    
    # Insert the element at the specified position
    lst.insert(position, element)
    
    return lst