TheFrenchDemos's picture
implemented core generation + detection
f747801
import random
import numpy as np
from ..core.detector import WmDetector
def generate_pastel_color():
"""Generate a pastel color in HSL format."""
h = random.random() # Random hue
s = 0.3 + random.random() * 0.2 # Saturation between 0.3-0.5
l = 0.8 + random.random() * 0.1 # Lightness between 0.8-0.9
return f"hsl({h*360}, {s*100}%, {l*100}%)"
def color_from_score(score: float):
"""
Take a score between 0 and 1 and output the color.
If the score is nan, returns a pastel gray color
If the score is close to 0, return pastel red, if the score is close to 1 returns pastel green.
"""
if isinstance(score, float) and not np.isnan(score):
# Red for low scores, green for high scores
h = 0 if score < 0.5 else 120 # 0 = red, 120 = green
s = 0.3 + 0.2 * abs(2 * score - 1) # Higher saturation for extreme values
l = 0.85 # Keep lightness constant for pastel colors
return f"hsl({h}, {s*100}%, {l*100}%)"
return "hsl(0, 0%, 85%)" # Pastel gray for NaN
def get_token_details(
text: str,
detector: WmDetector
) -> tuple:
"""
Run the detector on the text and outputs everything needed for display
"""
# Get scores for each token
token_details = detector.get_details(text)
# Get p-values for each token
pvalues, aux_info = detector.get_pvalues_by_tok(token_details)
display_info = []
for token_detail, pvalue in zip(token_details, pvalues):
score = token_detail['score'] if token_detail['is_scored'] else float('nan')
# Convert numpy types to native Python types
if isinstance(score, (np.floating, np.integer)):
score = float(score)
if isinstance(pvalue, (np.floating, np.integer)):
pvalue = float(pvalue)
display_info.append({
'is_scored': token_detail['is_scored'],
'token': token_detail['token_text'],
'color': color_from_score(score),
'score': score,
'pvalue': pvalue
})
# Add summary statistics and convert numpy types to native Python types
display_info.append({
'final_score': float(aux_info['final_score']),
'ntoks_scored': int(aux_info['ntoks_scored']),
'final_pvalue': float(aux_info['final_pvalue'])
})
return display_info
def template_prompt(instruction: str, prompt_type: str = "smollm") -> str:
"""Template a prompt according to the model's format.
Args:
instruction: The raw prompt/instruction to template
prompt_type: Type of prompt format (smollm, alpaca)
Returns:
The formatted prompt ready for the model
"""
if prompt_type == "alpaca":
return instruction
elif prompt_type == "smollm":
prompt = "<|im_start|>system\nYou are a helpful AI assistant named SmolLM, trained by Hugging Face<|im_end|>\n"
prompt += f"<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n"
return prompt
else:
raise ValueError(f"Prompt type {prompt_type} not supported")