Spaces:
Sleeping
Sleeping
import requests | |
import json | |
from typing import Dict, List | |
import numpy as np | |
def get_activation_values(text: str, feature_id: int) -> Dict: | |
"""Get activation values for a specific feature""" | |
url = "https://www.neuronpedia.org/api/activation/new" | |
data = { | |
"feature": { | |
"modelId": "gemma-2-2b", | |
"layer": "0-gemmascope-mlp-16k", | |
"index": str(feature_id) | |
}, | |
"customText": text | |
} | |
response = requests.post( | |
url, | |
headers={"Content-Type": "application/json"}, | |
json=data | |
) | |
return response.json() | |
def calculate_density(values: List[float], threshold: float = 0.5) -> float: | |
"""Calculate activation density (% of tokens with activation > threshold)""" | |
return sum(1 for v in values if v > threshold) / len(values) | |
def find_top_features_per_token(text: str, num_features: int = 5, | |
max_density: float = 0.01, batch_size: int = 100) -> Dict: | |
"""Find top features for each token with density filtering""" | |
# First get initial feature activations to get tokens | |
sample_activation = get_activation_values(text, 0) | |
tokens = sample_activation['tokens'] | |
token_features = {token: [] for token in tokens} | |
# Process features in batches | |
for start_idx in range(0, 16384, batch_size): | |
for feature_id in range(start_idx, min(start_idx + batch_size, 16384)): | |
result = get_activation_values(text, feature_id) | |
values = result.get('values', []) | |
# Calculate density and skip if too high | |
density = calculate_density(values) | |
if density > max_density: | |
continue | |
# Add feature to each token's list if activated | |
for token_idx, (token, value) in enumerate(zip(tokens, values)): | |
if value > 0.5: # Activation threshold | |
token_features[token].append({ | |
'feature_id': feature_id, | |
'activation': value, | |
'density': density | |
}) | |
# Sort features for each token and keep top N | |
for token in token_features: | |
token_features[token].sort(key=lambda x: x['activation'], reverse=True) | |
token_features[token] = token_features[token][:num_features] | |
return token_features | |
# Test the function | |
text = "Nike - Just Do It" | |
token_features = find_top_features_per_token(text) | |
# Print results | |
print(f"Text: {text}\n") | |
for token, features in token_features.items(): | |
if features: # Only show tokens with active features | |
print(f"\nToken: {token}") | |
for feat in features: | |
print(f" Feature {feat['feature_id']}: activation={feat['activation']:.3f}, density={feat['density']:.3%}") |