Spaces:

WordLift
/

brand-llms

Running

App Files Files Community

cyberandy commited on Dec 2, 2024

Commit

a24593e

verified ·

1 Parent(s): 4a2f0f6

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -42

app.py CHANGED Viewed

@@ -1,13 +1,7 @@
 import gradio as gr
 import requests
 from typing import Dict, Tuple, List
-SEMANTIC_CATEGORIES = [
-    "references to the term 'word' and its variations in different contexts",
-    "the start of a document",
-    "references to global events and organizations",
-    "elements related to search engine optimization (SEO)"
-]
 def get_top_features(text: str, k: int = 5) -> Dict:
     url = "https://www.neuronpedia.org/api/search-with-topk"
@@ -28,44 +22,51 @@ def get_top_features(text: str, k: int = 5) -> Dict:
     )
     return response.json() if response.status_code == 200 else None
-def categorize_features(features: List[Dict]) -> List[str]:
-    # This is a placeholder - in practice, you'd want to analyze the features
-    # and map them to semantic categories based on their activations
-    categories = []
-    for cat in SEMANTIC_CATEGORIES:
-        # Here you would check if the features match this category
-        if len(categories) < 5:  # Limit to top 5 categories
-            categories.append(cat)
-    return categories
 def format_output(data: Dict) -> Tuple[str, str, str]:
     if not data:
         return "Error analyzing text", "", ""
-    output = "# Semantic Analysis\n\n"
-    output += "*Analyzing semantic patterns in the text using Gemma's neural features*\n\n"
     all_features = []
     for result in data['results']:
-        if result['token'] == '<bos>':
             continue
-        all_features.extend(result['top_features'])
-    # Sort all features by activation value
-    top_features = sorted(all_features, key=lambda x: x['activation_value'], reverse=True)[:5]
-    # Get semantic categories
-    categories = categorize_features(top_features)
-    # Format output with semantic categories
-    output += "## 🔍 Key Semantic Patterns\n\n"
-    for cat in categories:
-        output += f"• {cat}\n"
-    # Add feature dashboard for highest activation feature
     if top_features:
-        max_feature = top_features[0]
-        dashboard_url = f"https://www.neuronpedia.org/gemma-2-2b/0-gemmascope-mlp-16k/{max_feature['feature_index']}?embed=true&embedexplanation=true&embedplots=true&embedtest=true&height=300"
         iframe = f'''
             <div style="border:1px solid #eee;border-radius:8px;padding:1px;background:#fff;">
                 <iframe
@@ -77,30 +78,30 @@ def format_output(data: Dict) -> Tuple[str, str, str]:
                 ></iframe>
             </div>
         '''
-        feature_label = f"📊 Top Neural Pattern Analysis"
     else:
         iframe = ""
-        feature_label = "No significant patterns found"
     return output, iframe, feature_label
 def create_interface():
     with gr.Blocks() as interface:
-        gr.Markdown("# 🧠 Semantic Pattern Analyzer")
-        gr.Markdown("*Analyze semantic patterns in text using neural features*")
         with gr.Row():
             with gr.Column():
                 input_text = gr.Textbox(
                     lines=5,
-                    placeholder="Enter text to analyze semantic patterns...",
                     label="Input Text"
                 )
-                analyze_btn = gr.Button("🔍 Analyze Patterns", variant="primary")
                 gr.Examples([
-                    "WordLift is an AI-powered SEO tool that optimizes content",
-                    "This document outlines the global health initiatives",
-                    "Using key words and terms to improve search rankings"
                 ], inputs=input_text)
             with gr.Column():

 import gradio as gr
 import requests
 from typing import Dict, Tuple, List
+from operator import itemgetter
 def get_top_features(text: str, k: int = 5) -> Dict:
     url = "https://www.neuronpedia.org/api/search-with-topk"
     )
     return response.json() if response.status_code == 200 else None
 def format_output(data: Dict) -> Tuple[str, str, str]:
     if not data:
         return "Error analyzing text", "", ""
+    # Collect all features from all tokens
     all_features = []
     for result in data['results']:
+        token = result['token']
+        if token == '<bos>':
             continue
+        for feature in result['top_features']:
+            all_features.append({
+                'token': token,
+                'feature_id': feature['feature_index'],
+                'activation': feature['activation_value'],
+                'feature_data': feature.get('feature', {})
+            })
+    # Sort all features by activation value and get top 5
+    top_features = sorted(all_features, key=itemgetter('activation'), reverse=True)[:5]
+    # Format output
+    output = "# Neural Feature Analysis\n\n"
+    output += "## Top 5 Most Active Features\n\n"
+    for idx, feat in enumerate(top_features, 1):
+        feature_url = f"https://www.neuronpedia.org/gemma-2-2b/0-gemmascope-mlp-16k/{feat['feature_id']}"
+        # Try to get feature name/description if available
+        feature_info = ""
+        if 'name' in feat['feature_data']:
+            feature_info = f" - {feat['feature_data']['name']}"
+        elif 'description' in feat['feature_data']:
+            feature_info = f" - {feat['feature_data']['description']}"
+        output += f"### {idx}. Feature {feat['feature_id']}{feature_info}\n"
+        output += f"- **Token:** '{feat['token']}'\n"
+        output += f"- **Activation:** {feat['activation']:.2f}\n"
+        output += f"- [View on Neuronpedia]({feature_url})\n\n"
+    # Use highest activation feature for dashboard
     if top_features:
+        top_feature = top_features[0]
+        dashboard_url = f"https://www.neuronpedia.org/gemma-2-2b/0-gemmascope-mlp-16k/{top_feature['feature_id']}?embed=true&embedexplanation=true&embedplots=true&embedtest=true&height=300"
         iframe = f'''
             <div style="border:1px solid #eee;border-radius:8px;padding:1px;background:#fff;">
                 <iframe
                 ></iframe>
             </div>
         '''
+        feature_label = f"Feature {top_feature['feature_id']} Dashboard (Activation: {top_feature['activation']:.2f})"
     else:
         iframe = ""
+        feature_label = "No significant features found"
     return output, iframe, feature_label
 def create_interface():
     with gr.Blocks() as interface:
+        gr.Markdown("# Neural Feature Analyzer")
+        gr.Markdown("*Analyze text using Gemma's interpretable neural features*")
         with gr.Row():
             with gr.Column():
                 input_text = gr.Textbox(
                     lines=5,
+                    placeholder="Enter text to analyze...",
                     label="Input Text"
                 )
+                analyze_btn = gr.Button("Analyze Features", variant="primary")
                 gr.Examples([
+                    "Nike - Just Do It. The power of determination.",
+                    "Apple - Think Different. Innovation redefined.",
+                    "McDonald's - I'm Lovin' It. Creating joy.",
                 ], inputs=input_text)
             with gr.Column():