Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,13 +1,7 @@
|
|
1 |
import gradio as gr
|
2 |
import requests
|
3 |
from typing import Dict, Tuple, List
|
4 |
-
|
5 |
-
SEMANTIC_CATEGORIES = [
|
6 |
-
"references to the term 'word' and its variations in different contexts",
|
7 |
-
"the start of a document",
|
8 |
-
"references to global events and organizations",
|
9 |
-
"elements related to search engine optimization (SEO)"
|
10 |
-
]
|
11 |
|
12 |
def get_top_features(text: str, k: int = 5) -> Dict:
|
13 |
url = "https://www.neuronpedia.org/api/search-with-topk"
|
@@ -28,44 +22,51 @@ def get_top_features(text: str, k: int = 5) -> Dict:
|
|
28 |
)
|
29 |
return response.json() if response.status_code == 200 else None
|
30 |
|
31 |
-
def categorize_features(features: List[Dict]) -> List[str]:
|
32 |
-
# This is a placeholder - in practice, you'd want to analyze the features
|
33 |
-
# and map them to semantic categories based on their activations
|
34 |
-
categories = []
|
35 |
-
for cat in SEMANTIC_CATEGORIES:
|
36 |
-
# Here you would check if the features match this category
|
37 |
-
if len(categories) < 5: # Limit to top 5 categories
|
38 |
-
categories.append(cat)
|
39 |
-
return categories
|
40 |
-
|
41 |
def format_output(data: Dict) -> Tuple[str, str, str]:
|
42 |
if not data:
|
43 |
return "Error analyzing text", "", ""
|
44 |
|
45 |
-
|
46 |
-
output += "*Analyzing semantic patterns in the text using Gemma's neural features*\n\n"
|
47 |
-
|
48 |
all_features = []
|
49 |
for result in data['results']:
|
50 |
-
|
|
|
51 |
continue
|
52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
|
54 |
-
# Sort all features by activation value
|
55 |
-
top_features = sorted(all_features, key=
|
56 |
|
57 |
-
#
|
58 |
-
|
|
|
59 |
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
|
65 |
-
#
|
66 |
if top_features:
|
67 |
-
|
68 |
-
dashboard_url = f"https://www.neuronpedia.org/gemma-2-2b/0-gemmascope-mlp-16k/{
|
69 |
iframe = f'''
|
70 |
<div style="border:1px solid #eee;border-radius:8px;padding:1px;background:#fff;">
|
71 |
<iframe
|
@@ -77,30 +78,30 @@ def format_output(data: Dict) -> Tuple[str, str, str]:
|
|
77 |
></iframe>
|
78 |
</div>
|
79 |
'''
|
80 |
-
feature_label = f"
|
81 |
else:
|
82 |
iframe = ""
|
83 |
-
feature_label = "No significant
|
84 |
|
85 |
return output, iframe, feature_label
|
86 |
|
87 |
def create_interface():
|
88 |
with gr.Blocks() as interface:
|
89 |
-
gr.Markdown("#
|
90 |
-
gr.Markdown("*Analyze
|
91 |
|
92 |
with gr.Row():
|
93 |
with gr.Column():
|
94 |
input_text = gr.Textbox(
|
95 |
lines=5,
|
96 |
-
placeholder="Enter text to analyze
|
97 |
label="Input Text"
|
98 |
)
|
99 |
-
analyze_btn = gr.Button("
|
100 |
gr.Examples([
|
101 |
-
"
|
102 |
-
"
|
103 |
-
"
|
104 |
], inputs=input_text)
|
105 |
|
106 |
with gr.Column():
|
|
|
1 |
import gradio as gr
|
2 |
import requests
|
3 |
from typing import Dict, Tuple, List
|
4 |
+
from operator import itemgetter
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
def get_top_features(text: str, k: int = 5) -> Dict:
|
7 |
url = "https://www.neuronpedia.org/api/search-with-topk"
|
|
|
22 |
)
|
23 |
return response.json() if response.status_code == 200 else None
|
24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
def format_output(data: Dict) -> Tuple[str, str, str]:
|
26 |
if not data:
|
27 |
return "Error analyzing text", "", ""
|
28 |
|
29 |
+
# Collect all features from all tokens
|
|
|
|
|
30 |
all_features = []
|
31 |
for result in data['results']:
|
32 |
+
token = result['token']
|
33 |
+
if token == '<bos>':
|
34 |
continue
|
35 |
+
|
36 |
+
for feature in result['top_features']:
|
37 |
+
all_features.append({
|
38 |
+
'token': token,
|
39 |
+
'feature_id': feature['feature_index'],
|
40 |
+
'activation': feature['activation_value'],
|
41 |
+
'feature_data': feature.get('feature', {})
|
42 |
+
})
|
43 |
|
44 |
+
# Sort all features by activation value and get top 5
|
45 |
+
top_features = sorted(all_features, key=itemgetter('activation'), reverse=True)[:5]
|
46 |
|
47 |
+
# Format output
|
48 |
+
output = "# Neural Feature Analysis\n\n"
|
49 |
+
output += "## Top 5 Most Active Features\n\n"
|
50 |
|
51 |
+
for idx, feat in enumerate(top_features, 1):
|
52 |
+
feature_url = f"https://www.neuronpedia.org/gemma-2-2b/0-gemmascope-mlp-16k/{feat['feature_id']}"
|
53 |
+
|
54 |
+
# Try to get feature name/description if available
|
55 |
+
feature_info = ""
|
56 |
+
if 'name' in feat['feature_data']:
|
57 |
+
feature_info = f" - {feat['feature_data']['name']}"
|
58 |
+
elif 'description' in feat['feature_data']:
|
59 |
+
feature_info = f" - {feat['feature_data']['description']}"
|
60 |
+
|
61 |
+
output += f"### {idx}. Feature {feat['feature_id']}{feature_info}\n"
|
62 |
+
output += f"- **Token:** '{feat['token']}'\n"
|
63 |
+
output += f"- **Activation:** {feat['activation']:.2f}\n"
|
64 |
+
output += f"- [View on Neuronpedia]({feature_url})\n\n"
|
65 |
|
66 |
+
# Use highest activation feature for dashboard
|
67 |
if top_features:
|
68 |
+
top_feature = top_features[0]
|
69 |
+
dashboard_url = f"https://www.neuronpedia.org/gemma-2-2b/0-gemmascope-mlp-16k/{top_feature['feature_id']}?embed=true&embedexplanation=true&embedplots=true&embedtest=true&height=300"
|
70 |
iframe = f'''
|
71 |
<div style="border:1px solid #eee;border-radius:8px;padding:1px;background:#fff;">
|
72 |
<iframe
|
|
|
78 |
></iframe>
|
79 |
</div>
|
80 |
'''
|
81 |
+
feature_label = f"Feature {top_feature['feature_id']} Dashboard (Activation: {top_feature['activation']:.2f})"
|
82 |
else:
|
83 |
iframe = ""
|
84 |
+
feature_label = "No significant features found"
|
85 |
|
86 |
return output, iframe, feature_label
|
87 |
|
88 |
def create_interface():
|
89 |
with gr.Blocks() as interface:
|
90 |
+
gr.Markdown("# Neural Feature Analyzer")
|
91 |
+
gr.Markdown("*Analyze text using Gemma's interpretable neural features*")
|
92 |
|
93 |
with gr.Row():
|
94 |
with gr.Column():
|
95 |
input_text = gr.Textbox(
|
96 |
lines=5,
|
97 |
+
placeholder="Enter text to analyze...",
|
98 |
label="Input Text"
|
99 |
)
|
100 |
+
analyze_btn = gr.Button("Analyze Features", variant="primary")
|
101 |
gr.Examples([
|
102 |
+
"Nike - Just Do It. The power of determination.",
|
103 |
+
"Apple - Think Different. Innovation redefined.",
|
104 |
+
"McDonald's - I'm Lovin' It. Creating joy.",
|
105 |
], inputs=input_text)
|
106 |
|
107 |
with gr.Column():
|