Spaces:
Sleeping
Sleeping
from typing import List | |
from modules.classify import classify, map_scores, get_first_relevant_label | |
labels = ["something else", "headlines, news channels, news articles, breaking news", "politics, policy and politicians", "health care and public health", "religious"] | |
def handle_text_content(platform, items): | |
texts = [] | |
for item in items: | |
if platform == "reddit" and item.title: | |
text = item.title +"\n"+ item.text | |
else: | |
text = item.text | |
if len(text) <=5: | |
text = "NON-VALID" | |
texts.append(text) | |
return texts | |
def redistribute(platform, items): | |
predicted_labels = classify(texts=handle_text_content(platform=platform, items=items), labels=labels) | |
mapped_scores = map_scores(predicted_labels=predicted_labels, default_label="something else") | |
first_topic, insertion_pos = get_first_relevant_label(predicted_labels=predicted_labels, mapped_scores=mapped_scores, default_label="something else") | |
# TODO include parent linking | |
reranked_ids, _ = distribute_evenly(ids=[item.id for item in items], scores=mapped_scores) | |
return reranked_ids, first_topic, insertion_pos | |
def distribute_evenly(ids: List[str], scores: List[float]): | |
""" | |
This method receives a list of ids and a list of scores. Scores distinct to zero get distributed evenly along the list, preserving cardinality. | |
""" | |
# Indices of non-zero scores and total scores length | |
non_zero_indices = [i for i, score in enumerate(scores) if score != 0] | |
non_zero_count = len(non_zero_indices) | |
total_scores = len(scores) | |
if non_zero_count == 0: | |
return ids[:], scores[:] # Return copies if no non-zero scores | |
# Calculate new positions for non-zero scores evenly distributed | |
interval = total_scores // non_zero_count | |
new_positions = [(i * interval) for i in range(non_zero_count)] | |
# Initialize new scores and ids arrays | |
new_scores = [0] * total_scores | |
new_objects_order = [None] * total_scores | |
# Place the non-zero scores and corresponding objects in the new positions | |
for new_pos, old_pos in zip(new_positions, non_zero_indices): | |
new_scores[new_pos] = scores[old_pos] | |
new_objects_order[new_pos] = ids[old_pos] | |
# Fill in the gaps in the objects list | |
used_indices = set(non_zero_indices) # Set for quick lookup | |
fill_index = 0 # Tracker for filling positions | |
for i in range(total_scores): | |
if new_objects_order[i] is None: | |
# Skip indices that have been used | |
while fill_index in used_indices: | |
fill_index += 1 | |
if fill_index < total_scores: | |
new_objects_order[i] = ids[fill_index] | |
fill_index += 1 | |
return new_objects_order, new_scores | |
def insert_element_at_position(lst, element, position): | |
""" | |
Inserts an element at a specific position in the list. | |
Args: | |
lst (list): The list to insert the element into. | |
element (any): The element to insert into the list. | |
position (int): The index at which to insert the element. | |
Returns: | |
list: The list with the element inserted. | |
""" | |
# Ensure the position is within the bounds of the list plus one for appending | |
position = min(position, len(lst)) | |
# Insert the element at the specified position | |
lst.insert(position, element) | |
return lst | |