cyberandy commited on
Commit
dce4ae1
·
verified ·
1 Parent(s): 6465b33

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -102
app.py CHANGED
@@ -1,25 +1,18 @@
1
  import gradio as gr
2
  import requests
3
- from typing import Dict, Tuple, List
4
- from operator import itemgetter
5
- from collections import Counter
6
  import logging
7
 
8
- # Set up logging
9
  logging.basicConfig(level=logging.INFO)
10
  logger = logging.getLogger(__name__)
11
 
12
- def get_features(text: str, k: int = 5) -> Dict:
13
- """Get neural features from the API with detailed logging."""
14
  url = "https://www.neuronpedia.org/api/search-with-topk"
15
  payload = {
16
  "modelId": "gemma-2-2b",
17
- "layer": "20-gemmascope-res-16k", # Updated to match website
18
- "sourceSet": "gemma-scope",
19
  "text": text,
20
- "k": k,
21
- "maxDensity": 0.01,
22
- "ignoreBos": True
23
  }
24
 
25
  try:
@@ -29,106 +22,61 @@ def get_features(text: str, k: int = 5) -> Dict:
29
  json=payload
30
  )
31
  response.raise_for_status()
32
- data = response.json()
33
-
34
- # Log the raw response for analysis
35
- logger.info(f"API Response: {data}")
36
-
37
- # Analyze feature distribution
38
- all_features = []
39
- feature_counter = Counter()
40
-
41
- for result in data['results']:
42
- token = result['token']
43
- logger.info(f"\nToken: {token}")
44
-
45
- for feature in result['top_features']:
46
- feature_id = feature['feature_index']
47
- activation = feature['activation_value']
48
- logger.info(f"Feature {feature_id}: {activation}")
49
-
50
- all_features.append({
51
- 'token': token,
52
- 'feature_id': feature_id,
53
- 'activation': activation,
54
- 'feature_data': feature.get('feature', {})
55
- })
56
- feature_counter[feature_id] += 1
57
-
58
- # Log feature frequency analysis
59
- logger.info("\nFeature Frequencies:")
60
- for feature_id, count in feature_counter.most_common():
61
- logger.info(f"Feature {feature_id}: {count} occurrences")
62
-
63
- return data, all_features, feature_counter
64
-
65
  except Exception as e:
66
  logger.error(f"Error in API call: {str(e)}")
67
- return None, [], Counter()
68
 
69
  def format_output(text: str) -> Tuple[str, str, str]:
70
- data, all_features, feature_counter = get_features(text)
71
 
72
- if not data:
73
  return "Error analyzing text", "", ""
74
-
75
- # Sort features by frequency first, then by maximum activation within each feature
76
- feature_activations = {}
77
- for feature in all_features:
78
- feature_id = feature['feature_id']
79
- activation = feature['activation']
80
- if feature_id not in feature_activations or activation > feature_activations[feature_id]['activation']:
81
- feature_activations[feature_id] = feature
82
-
83
- # Get top features by frequency, then sort by activation
84
- most_common_features = [
85
- feature_activations[feature_id]
86
- for feature_id, _ in feature_counter.most_common()
87
- ]
88
-
89
- # Sort by activation within the most common features
90
- top_features = sorted(most_common_features, key=itemgetter('activation'), reverse=True)[:5]
91
-
92
- # Format output
93
  output = "# Neural Feature Analysis\n\n"
94
- output += "## Top 5 Most Active Features\n\n"
95
 
96
- for idx, feat in enumerate(top_features, 1):
97
- feature_url = f"https://www.neuronpedia.org/gemma-2-2b/20-gemmascope-res-16k/{feat['feature_id']}"
98
-
99
- feature_info = ""
100
- if 'name' in feat['feature_data']:
101
- feature_info = f" - {feat['feature_data']['name']}"
102
- elif 'description' in feat['feature_data']:
103
- feature_info = f" - {feat['feature_data']['description']}"
104
 
105
- output += f"### {idx}. Feature {feat['feature_id']}{feature_info}\n"
106
- output += f"- **Token:** '{feat['token']}'\n"
107
- output += f"- **Activation:** {feat['activation']:.2f}\n"
108
- output += f"- **Frequency:** {feature_counter[feat['feature_id']]} occurrences\n"
109
- output += f"- [View on Neuronpedia]({feature_url})\n\n"
 
 
110
 
111
- # Use highest activation feature for dashboard
112
- if top_features:
113
- top_feature = top_features[0]
114
- dashboard_url = f"https://www.neuronpedia.org/gemma-2-2b/20-gemmascope-res-16k/{top_feature['feature_id']}?embed=true&embedexplanation=true&embedplots=true&embedtest=true&height=300"
115
- iframe = f'''
116
- <div style="border:1px solid #eee;border-radius:8px;padding:1px;background:#fff;">
117
- <iframe
118
- src="{dashboard_url}"
119
- width="100%"
120
- height="600px"
121
- frameborder="0"
122
- style="border-radius:8px;"
123
- ></iframe>
124
- </div>
125
- '''
126
- feature_label = f"Feature {top_feature['feature_id']} Dashboard (Activation: {top_feature['activation']:.2f})"
127
- else:
128
- iframe = ""
129
- feature_label = "No significant features found"
 
 
 
 
 
130
 
131
- return output, iframe, feature_label
132
 
133
  def create_interface():
134
  with gr.Blocks() as interface:
@@ -145,8 +93,8 @@ def create_interface():
145
  analyze_btn = gr.Button("Analyze Features", variant="primary")
146
  gr.Examples([
147
  "WordLift",
148
- "Nike - Just Do It. The power of determination.",
149
- "Apple - Think Different. Innovation redefined.",
150
  ], inputs=input_text)
151
 
152
  with gr.Column():
 
1
  import gradio as gr
2
  import requests
3
+ from typing import Dict, Tuple
 
 
4
  import logging
5
 
 
6
  logging.basicConfig(level=logging.INFO)
7
  logger = logging.getLogger(__name__)
8
 
9
+ def get_features(text: str) -> Dict:
10
+ """Get neural features from the API using the exact website parameters."""
11
  url = "https://www.neuronpedia.org/api/search-with-topk"
12
  payload = {
13
  "modelId": "gemma-2-2b",
 
 
14
  "text": text,
15
+ "layer": "20-gemmascope-res-16k"
 
 
16
  }
17
 
18
  try:
 
22
  json=payload
23
  )
24
  response.raise_for_status()
25
+ return response.json()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  except Exception as e:
27
  logger.error(f"Error in API call: {str(e)}")
28
+ return None
29
 
30
  def format_output(text: str) -> Tuple[str, str, str]:
31
+ data = get_features(text)
32
 
33
+ if not data or 'results' not in data:
34
  return "Error analyzing text", "", ""
35
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  output = "# Neural Feature Analysis\n\n"
 
37
 
38
+ # Process each token's features
39
+ for result in data['results']:
40
+ token = result['token']
41
+ if token == '<bos>':
42
+ continue
43
+
44
+ output += f"## Token: {token}\n\n"
 
45
 
46
+ for feature in result['top_features']:
47
+ feature_id = feature['feature_index']
48
+ activation = feature['activation_value']
49
+ feature_url = f"https://www.neuronpedia.org/gemma-2-2b/20-gemmascope-res-16k/{feature_id}"
50
+
51
+ output += f"- Feature {feature_id} (Activation: {activation:.2f})\n"
52
+ output += f" [View on Neuronpedia]({feature_url})\n\n"
53
 
54
+ # Use first non-BOS token's top feature for the dashboard
55
+ dashboard_html = ""
56
+ feature_label = "No features found"
57
+
58
+ for result in data['results']:
59
+ if result['token'] != '<bos>' and result['top_features']:
60
+ top_feature = result['top_features'][0]
61
+ feature_id = top_feature['feature_index']
62
+ activation = top_feature['activation_value']
63
+
64
+ dashboard_url = f"https://www.neuronpedia.org/gemma-2-2b/20-gemmascope-res-16k/{feature_id}?embed=true&embedexplanation=true&embedplots=true&embedtest=true&height=300"
65
+ dashboard_html = f'''
66
+ <div style="border:1px solid #eee;border-radius:8px;padding:1px;background:#fff;">
67
+ <iframe
68
+ src="{dashboard_url}"
69
+ width="100%"
70
+ height="600px"
71
+ frameborder="0"
72
+ style="border-radius:8px;"
73
+ ></iframe>
74
+ </div>
75
+ '''
76
+ feature_label = f"Feature {feature_id} Dashboard (Activation: {activation:.2f})"
77
+ break
78
 
79
+ return output, dashboard_html, feature_label
80
 
81
  def create_interface():
82
  with gr.Blocks() as interface:
 
93
  analyze_btn = gr.Button("Analyze Features", variant="primary")
94
  gr.Examples([
95
  "WordLift",
96
+ "Think Different",
97
+ "Just Do It"
98
  ], inputs=input_text)
99
 
100
  with gr.Column():