brand-llms / app.py
cyberandy's picture
Update app.py
5ee5132 verified
raw
history blame
2.83 kB
import requests
import json
from typing import Dict, List
import numpy as np
def get_activation_values(text: str, feature_id: int) -> Dict:
"""Get activation values for a specific feature"""
url = "https://www.neuronpedia.org/api/activation/new"
data = {
"feature": {
"modelId": "gemma-2-2b",
"layer": "0-gemmascope-mlp-16k",
"index": str(feature_id)
},
"customText": text
}
response = requests.post(
url,
headers={"Content-Type": "application/json"},
json=data
)
return response.json()
def calculate_density(values: List[float], threshold: float = 0.5) -> float:
"""Calculate activation density (% of tokens with activation > threshold)"""
return sum(1 for v in values if v > threshold) / len(values)
def find_top_features_per_token(text: str, num_features: int = 5,
max_density: float = 0.01, batch_size: int = 100) -> Dict:
"""Find top features for each token with density filtering"""
# First get initial feature activations to get tokens
sample_activation = get_activation_values(text, 0)
tokens = sample_activation['tokens']
token_features = {token: [] for token in tokens}
# Process features in batches
for start_idx in range(0, 16384, batch_size):
for feature_id in range(start_idx, min(start_idx + batch_size, 16384)):
result = get_activation_values(text, feature_id)
values = result.get('values', [])
# Calculate density and skip if too high
density = calculate_density(values)
if density > max_density:
continue
# Add feature to each token's list if activated
for token_idx, (token, value) in enumerate(zip(tokens, values)):
if value > 0.5: # Activation threshold
token_features[token].append({
'feature_id': feature_id,
'activation': value,
'density': density
})
# Sort features for each token and keep top N
for token in token_features:
token_features[token].sort(key=lambda x: x['activation'], reverse=True)
token_features[token] = token_features[token][:num_features]
return token_features
# Test the function
text = "Nike - Just Do It"
token_features = find_top_features_per_token(text)
# Print results
print(f"Text: {text}\n")
for token, features in token_features.items():
if features: # Only show tokens with active features
print(f"\nToken: {token}")
for feat in features:
print(f" Feature {feat['feature_id']}: activation={feat['activation']:.3f}, density={feat['density']:.3%}")