cyberandy commited on
Commit
a24593e
Β·
verified Β·
1 Parent(s): 4a2f0f6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -42
app.py CHANGED
@@ -1,13 +1,7 @@
1
  import gradio as gr
2
  import requests
3
  from typing import Dict, Tuple, List
4
-
5
- SEMANTIC_CATEGORIES = [
6
- "references to the term 'word' and its variations in different contexts",
7
- "the start of a document",
8
- "references to global events and organizations",
9
- "elements related to search engine optimization (SEO)"
10
- ]
11
 
12
  def get_top_features(text: str, k: int = 5) -> Dict:
13
  url = "https://www.neuronpedia.org/api/search-with-topk"
@@ -28,44 +22,51 @@ def get_top_features(text: str, k: int = 5) -> Dict:
28
  )
29
  return response.json() if response.status_code == 200 else None
30
 
31
- def categorize_features(features: List[Dict]) -> List[str]:
32
- # This is a placeholder - in practice, you'd want to analyze the features
33
- # and map them to semantic categories based on their activations
34
- categories = []
35
- for cat in SEMANTIC_CATEGORIES:
36
- # Here you would check if the features match this category
37
- if len(categories) < 5: # Limit to top 5 categories
38
- categories.append(cat)
39
- return categories
40
-
41
  def format_output(data: Dict) -> Tuple[str, str, str]:
42
  if not data:
43
  return "Error analyzing text", "", ""
44
 
45
- output = "# Semantic Analysis\n\n"
46
- output += "*Analyzing semantic patterns in the text using Gemma's neural features*\n\n"
47
-
48
  all_features = []
49
  for result in data['results']:
50
- if result['token'] == '<bos>':
 
51
  continue
52
- all_features.extend(result['top_features'])
 
 
 
 
 
 
 
53
 
54
- # Sort all features by activation value
55
- top_features = sorted(all_features, key=lambda x: x['activation_value'], reverse=True)[:5]
56
 
57
- # Get semantic categories
58
- categories = categorize_features(top_features)
 
59
 
60
- # Format output with semantic categories
61
- output += "## πŸ” Key Semantic Patterns\n\n"
62
- for cat in categories:
63
- output += f"β€’ {cat}\n"
 
 
 
 
 
 
 
 
 
 
64
 
65
- # Add feature dashboard for highest activation feature
66
  if top_features:
67
- max_feature = top_features[0]
68
- dashboard_url = f"https://www.neuronpedia.org/gemma-2-2b/0-gemmascope-mlp-16k/{max_feature['feature_index']}?embed=true&embedexplanation=true&embedplots=true&embedtest=true&height=300"
69
  iframe = f'''
70
  <div style="border:1px solid #eee;border-radius:8px;padding:1px;background:#fff;">
71
  <iframe
@@ -77,30 +78,30 @@ def format_output(data: Dict) -> Tuple[str, str, str]:
77
  ></iframe>
78
  </div>
79
  '''
80
- feature_label = f"πŸ“Š Top Neural Pattern Analysis"
81
  else:
82
  iframe = ""
83
- feature_label = "No significant patterns found"
84
 
85
  return output, iframe, feature_label
86
 
87
  def create_interface():
88
  with gr.Blocks() as interface:
89
- gr.Markdown("# 🧠 Semantic Pattern Analyzer")
90
- gr.Markdown("*Analyze semantic patterns in text using neural features*")
91
 
92
  with gr.Row():
93
  with gr.Column():
94
  input_text = gr.Textbox(
95
  lines=5,
96
- placeholder="Enter text to analyze semantic patterns...",
97
  label="Input Text"
98
  )
99
- analyze_btn = gr.Button("πŸ” Analyze Patterns", variant="primary")
100
  gr.Examples([
101
- "WordLift is an AI-powered SEO tool that optimizes content",
102
- "This document outlines the global health initiatives",
103
- "Using key words and terms to improve search rankings"
104
  ], inputs=input_text)
105
 
106
  with gr.Column():
 
1
  import gradio as gr
2
  import requests
3
  from typing import Dict, Tuple, List
4
+ from operator import itemgetter
 
 
 
 
 
 
5
 
6
  def get_top_features(text: str, k: int = 5) -> Dict:
7
  url = "https://www.neuronpedia.org/api/search-with-topk"
 
22
  )
23
  return response.json() if response.status_code == 200 else None
24
 
 
 
 
 
 
 
 
 
 
 
25
  def format_output(data: Dict) -> Tuple[str, str, str]:
26
  if not data:
27
  return "Error analyzing text", "", ""
28
 
29
+ # Collect all features from all tokens
 
 
30
  all_features = []
31
  for result in data['results']:
32
+ token = result['token']
33
+ if token == '<bos>':
34
  continue
35
+
36
+ for feature in result['top_features']:
37
+ all_features.append({
38
+ 'token': token,
39
+ 'feature_id': feature['feature_index'],
40
+ 'activation': feature['activation_value'],
41
+ 'feature_data': feature.get('feature', {})
42
+ })
43
 
44
+ # Sort all features by activation value and get top 5
45
+ top_features = sorted(all_features, key=itemgetter('activation'), reverse=True)[:5]
46
 
47
+ # Format output
48
+ output = "# Neural Feature Analysis\n\n"
49
+ output += "## Top 5 Most Active Features\n\n"
50
 
51
+ for idx, feat in enumerate(top_features, 1):
52
+ feature_url = f"https://www.neuronpedia.org/gemma-2-2b/0-gemmascope-mlp-16k/{feat['feature_id']}"
53
+
54
+ # Try to get feature name/description if available
55
+ feature_info = ""
56
+ if 'name' in feat['feature_data']:
57
+ feature_info = f" - {feat['feature_data']['name']}"
58
+ elif 'description' in feat['feature_data']:
59
+ feature_info = f" - {feat['feature_data']['description']}"
60
+
61
+ output += f"### {idx}. Feature {feat['feature_id']}{feature_info}\n"
62
+ output += f"- **Token:** '{feat['token']}'\n"
63
+ output += f"- **Activation:** {feat['activation']:.2f}\n"
64
+ output += f"- [View on Neuronpedia]({feature_url})\n\n"
65
 
66
+ # Use highest activation feature for dashboard
67
  if top_features:
68
+ top_feature = top_features[0]
69
+ dashboard_url = f"https://www.neuronpedia.org/gemma-2-2b/0-gemmascope-mlp-16k/{top_feature['feature_id']}?embed=true&embedexplanation=true&embedplots=true&embedtest=true&height=300"
70
  iframe = f'''
71
  <div style="border:1px solid #eee;border-radius:8px;padding:1px;background:#fff;">
72
  <iframe
 
78
  ></iframe>
79
  </div>
80
  '''
81
+ feature_label = f"Feature {top_feature['feature_id']} Dashboard (Activation: {top_feature['activation']:.2f})"
82
  else:
83
  iframe = ""
84
+ feature_label = "No significant features found"
85
 
86
  return output, iframe, feature_label
87
 
88
  def create_interface():
89
  with gr.Blocks() as interface:
90
+ gr.Markdown("# Neural Feature Analyzer")
91
+ gr.Markdown("*Analyze text using Gemma's interpretable neural features*")
92
 
93
  with gr.Row():
94
  with gr.Column():
95
  input_text = gr.Textbox(
96
  lines=5,
97
+ placeholder="Enter text to analyze...",
98
  label="Input Text"
99
  )
100
+ analyze_btn = gr.Button("Analyze Features", variant="primary")
101
  gr.Examples([
102
+ "Nike - Just Do It. The power of determination.",
103
+ "Apple - Think Different. Innovation redefined.",
104
+ "McDonald's - I'm Lovin' It. Creating joy.",
105
  ], inputs=input_text)
106
 
107
  with gr.Column():