socialboost / app /modules /redistribute.py
loplopez
tests on classification results
8ad2ef4
raw
history blame
3.41 kB
from typing import List
from modules.classify import classify, map_scores, get_first_relevant_label
labels = ["something else", "headlines, news channels, news articles, breaking news", "politics, policy and politicians", "health care and public health", "religious"]
def handle_text_content(platform, items):
texts = []
for item in items:
if platform == "reddit" and item.title:
text = item.title +"\n"+ item.text
else:
text = item.text
if len(text) <=5:
text = "NON-VALID"
texts.append(text)
return texts
def redistribute(platform, items):
predicted_labels = classify(texts=handle_text_content(platform=platform, items=items), labels=labels)
mapped_scores = map_scores(predicted_labels=predicted_labels, default_label="something else")
first_topic, insertion_pos = get_first_relevant_label(predicted_labels=predicted_labels, mapped_scores=mapped_scores, default_label="something else")
# TODO include parent linking
reranked_ids, _ = distribute_evenly(ids=[item.id for item in items], scores=mapped_scores)
return reranked_ids, first_topic, insertion_pos
def distribute_evenly(ids: List[str], scores: List[float]):
"""
This method receives a list of ids and a list of scores. Scores distinct to zero get distributed evenly along the list, preserving cardinality.
"""
# Indices of non-zero scores and total scores length
non_zero_indices = [i for i, score in enumerate(scores) if score != 0]
non_zero_count = len(non_zero_indices)
total_scores = len(scores)
if non_zero_count == 0:
return ids[:], scores[:] # Return copies if no non-zero scores
# Calculate new positions for non-zero scores evenly distributed
interval = total_scores // non_zero_count
new_positions = [(i * interval) for i in range(non_zero_count)]
# Initialize new scores and ids arrays
new_scores = [0] * total_scores
new_objects_order = [None] * total_scores
# Place the non-zero scores and corresponding objects in the new positions
for new_pos, old_pos in zip(new_positions, non_zero_indices):
new_scores[new_pos] = scores[old_pos]
new_objects_order[new_pos] = ids[old_pos]
# Fill in the gaps in the objects list
used_indices = set(non_zero_indices) # Set for quick lookup
fill_index = 0 # Tracker for filling positions
for i in range(total_scores):
if new_objects_order[i] is None:
# Skip indices that have been used
while fill_index in used_indices:
fill_index += 1
if fill_index < total_scores:
new_objects_order[i] = ids[fill_index]
fill_index += 1
return new_objects_order, new_scores
def insert_element_at_position(lst, element, position):
"""
Inserts an element at a specific position in the list.
Args:
lst (list): The list to insert the element into.
element (any): The element to insert into the list.
position (int): The index at which to insert the element.
Returns:
list: The list with the element inserted.
"""
# Ensure the position is within the bounds of the list plus one for appending
position = min(position, len(lst))
# Insert the element at the specified position
lst.insert(position, element)
return lst