cyberandy commited on
Commit
4a2f0f6
·
verified ·
1 Parent(s): 9f93b47

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -117
app.py CHANGED
@@ -1,56 +1,14 @@
1
  import gradio as gr
2
  import requests
3
- from typing import Dict, Tuple
4
 
5
- # Custom CSS for Open Sans font and color theme
6
- css = """
7
- @import url('https://fonts.googleapis.com/css2?family=Open+Sans:wght@300;400;600;700&display=swap');
8
-
9
- body {
10
- font-family: 'Open Sans', sans-serif !important;
11
- }
12
-
13
- .primary-btn {
14
- background-color: #3452db !important;
15
- }
16
-
17
- .primary-btn:hover {
18
- background-color: #2a41af !important;
19
- }
20
- """
21
-
22
- # Create custom theme with specific color
23
- theme = gr.themes.Soft(
24
- primary_hue=gr.themes.colors.Color(
25
- name="blue",
26
- c50="#eef1ff",
27
- c100="#e0e5ff",
28
- c200="#c3cbff",
29
- c300="#a5b2ff",
30
- c400="#8798ff",
31
- c500="#6a7eff",
32
- c600="#3452db", # Our main color
33
- c700="#2a41af",
34
- c800="#1f3183",
35
- c900="#152156",
36
- c950="#0a102b",
37
- )
38
- )
39
-
40
- BRAND_EXAMPLES = [
41
- "Nike - Just Do It. The power of determination.",
42
- "Apple - Think Different. Innovation redefined.",
43
- "McDonald's - I'm Lovin' It. Creating joy.",
44
- "BMW - The Ultimate Driving Machine.",
45
- "L'Oréal - Because You're Worth It."
46
  ]
47
 
48
- def get_feature_url(feature_id: int, dashboard: bool = False) -> str:
49
- base_url = f"https://www.neuronpedia.org/gemma-2-2b/0-gemmascope-mlp-16k/{feature_id}"
50
- if dashboard:
51
- return f"{base_url}?embed=true&embedexplanation=true&embedplots=true&embedtest=true&height=300"
52
- return base_url
53
-
54
  def get_top_features(text: str, k: int = 5) -> Dict:
55
  url = "https://www.neuronpedia.org/api/search-with-topk"
56
  payload = {
@@ -70,53 +28,44 @@ def get_top_features(text: str, k: int = 5) -> Dict:
70
  )
71
  return response.json() if response.status_code == 200 else None
72
 
 
 
 
 
 
 
 
 
 
 
73
  def format_output(data: Dict) -> Tuple[str, str, str]:
74
  if not data:
75
  return "Error analyzing text", "", ""
76
-
77
- output = (
78
- "# Neural Feature Analysis\n\n"
79
- "*Analyzing neural patterns in the text using Gemma's interpretable features*\n\n"
80
- )
81
 
82
- # Format token-feature analysis
 
 
 
83
  for result in data['results']:
84
- token = result['token']
85
- if token == '<bos>': # Skip BOS token
86
  continue
87
-
88
- features = result['top_features']
89
- if features:
90
- output += f"\n## 🔍 Token: '{token}'\n"
91
- for feat in features:
92
- feat_index = feat['feature_index']
93
- activation = feat['activation_value']
94
- feature_url = get_feature_url(feat_index)
95
-
96
- # Try to get feature info/label
97
- feature_info = ""
98
- if 'feature' in feat and isinstance(feat['feature'], dict):
99
- if 'name' in feat['feature']:
100
- feature_info = f" - {feat['feature']['name']}"
101
- elif 'description' in feat['feature']:
102
- feature_info = f" - {feat['feature']['description']}"
103
-
104
- output += f"### Feature [{feat_index}]({feature_url}){feature_info}\n"
105
- output += f"- **Activation Score:** {activation:.2f}\n"
106
- output += f"- [📊 View Analysis Dashboard]({feature_url})\n"
107
 
108
- # Get highest activation feature for dashboard
109
- max_activation = 0
110
- max_feature = None
111
 
112
- for result in data['results']:
113
- for feature in result['top_features']:
114
- if feature['activation_value'] > max_activation:
115
- max_activation = feature['activation_value']
116
- max_feature = feature['feature_index']
117
 
118
- if max_feature:
119
- dashboard_url = get_feature_url(max_feature, dashboard=True)
 
 
 
 
 
 
 
120
  iframe = f'''
121
  <div style="border:1px solid #eee;border-radius:8px;padding:1px;background:#fff;">
122
  <iframe
@@ -128,51 +77,36 @@ def format_output(data: Dict) -> Tuple[str, str, str]:
128
  ></iframe>
129
  </div>
130
  '''
131
- feature_label = f"📊 Feature {max_feature} Analysis (Peak Activation: {max_activation:.2f})"
132
  else:
133
  iframe = ""
134
- feature_label = "No significant features found"
135
 
136
  return output, iframe, feature_label
137
 
138
  def create_interface():
139
- with gr.Blocks(theme=theme, css=css) as interface:
140
- gr.Markdown(
141
- "# 🧠 Neural Feature Analyzer",
142
- elem_classes="text-center"
143
- )
144
- gr.Markdown(
145
- "*Analyze text using Gemma's interpretable neural features*\n\n"
146
- "Shows top 5 most activated features for each token with density < 1%",
147
- elem_classes="text-center"
148
- )
149
 
150
  with gr.Row():
151
  with gr.Column():
152
  input_text = gr.Textbox(
153
  lines=5,
154
- placeholder="Enter text to analyze...",
155
- label="Input Text",
156
- elem_classes="input-text"
157
- )
158
- analyze_btn = gr.Button(
159
- "🔍 Analyze Neural Features",
160
- variant="primary",
161
- elem_classes="primary-btn"
162
- )
163
- gr.Examples(
164
- BRAND_EXAMPLES,
165
- inputs=input_text,
166
- elem_classes="examples"
167
  )
 
 
 
 
 
 
168
 
169
  with gr.Column():
170
- output_text = gr.Markdown(elem_classes="output-text")
171
- feature_label = gr.Text(
172
- show_label=False,
173
- elem_classes="feature-label"
174
- )
175
- dashboard = gr.HTML(elem_classes="dashboard")
176
 
177
  analyze_btn.click(
178
  fn=lambda text: format_output(get_top_features(text)),
 
1
  import gradio as gr
2
  import requests
3
+ from typing import Dict, Tuple, List
4
 
5
+ SEMANTIC_CATEGORIES = [
6
+ "references to the term 'word' and its variations in different contexts",
7
+ "the start of a document",
8
+ "references to global events and organizations",
9
+ "elements related to search engine optimization (SEO)"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  ]
11
 
 
 
 
 
 
 
12
  def get_top_features(text: str, k: int = 5) -> Dict:
13
  url = "https://www.neuronpedia.org/api/search-with-topk"
14
  payload = {
 
28
  )
29
  return response.json() if response.status_code == 200 else None
30
 
31
+ def categorize_features(features: List[Dict]) -> List[str]:
32
+ # This is a placeholder - in practice, you'd want to analyze the features
33
+ # and map them to semantic categories based on their activations
34
+ categories = []
35
+ for cat in SEMANTIC_CATEGORIES:
36
+ # Here you would check if the features match this category
37
+ if len(categories) < 5: # Limit to top 5 categories
38
+ categories.append(cat)
39
+ return categories
40
+
41
  def format_output(data: Dict) -> Tuple[str, str, str]:
42
  if not data:
43
  return "Error analyzing text", "", ""
 
 
 
 
 
44
 
45
+ output = "# Semantic Analysis\n\n"
46
+ output += "*Analyzing semantic patterns in the text using Gemma's neural features*\n\n"
47
+
48
+ all_features = []
49
  for result in data['results']:
50
+ if result['token'] == '<bos>':
 
51
  continue
52
+ all_features.extend(result['top_features'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
+ # Sort all features by activation value
55
+ top_features = sorted(all_features, key=lambda x: x['activation_value'], reverse=True)[:5]
 
56
 
57
+ # Get semantic categories
58
+ categories = categorize_features(top_features)
 
 
 
59
 
60
+ # Format output with semantic categories
61
+ output += "## 🔍 Key Semantic Patterns\n\n"
62
+ for cat in categories:
63
+ output += f"• {cat}\n"
64
+
65
+ # Add feature dashboard for highest activation feature
66
+ if top_features:
67
+ max_feature = top_features[0]
68
+ dashboard_url = f"https://www.neuronpedia.org/gemma-2-2b/0-gemmascope-mlp-16k/{max_feature['feature_index']}?embed=true&embedexplanation=true&embedplots=true&embedtest=true&height=300"
69
  iframe = f'''
70
  <div style="border:1px solid #eee;border-radius:8px;padding:1px;background:#fff;">
71
  <iframe
 
77
  ></iframe>
78
  </div>
79
  '''
80
+ feature_label = f"📊 Top Neural Pattern Analysis"
81
  else:
82
  iframe = ""
83
+ feature_label = "No significant patterns found"
84
 
85
  return output, iframe, feature_label
86
 
87
  def create_interface():
88
+ with gr.Blocks() as interface:
89
+ gr.Markdown("# 🧠 Semantic Pattern Analyzer")
90
+ gr.Markdown("*Analyze semantic patterns in text using neural features*")
 
 
 
 
 
 
 
91
 
92
  with gr.Row():
93
  with gr.Column():
94
  input_text = gr.Textbox(
95
  lines=5,
96
+ placeholder="Enter text to analyze semantic patterns...",
97
+ label="Input Text"
 
 
 
 
 
 
 
 
 
 
 
98
  )
99
+ analyze_btn = gr.Button("🔍 Analyze Patterns", variant="primary")
100
+ gr.Examples([
101
+ "WordLift is an AI-powered SEO tool that optimizes content",
102
+ "This document outlines the global health initiatives",
103
+ "Using key words and terms to improve search rankings"
104
+ ], inputs=input_text)
105
 
106
  with gr.Column():
107
+ output_text = gr.Markdown()
108
+ feature_label = gr.Text(show_label=False)
109
+ dashboard = gr.HTML()
 
 
 
110
 
111
  analyze_btn.click(
112
  fn=lambda text: format_output(get_top_features(text)),