Spaces:

PromptMeister
/

keyword-DNA-analyzer

Sleeping

App Files Files Community

PromptMeister commited on May 31

Commit

c2f1156

verified ·

1 Parent(s): cd53598

Update app.py

Browse files

added voice function back as it was left out when aisnipper colors were added

Files changed (1) hide show

app.py +970 -241

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# Your existing imports remain the same
 import gradio as gr
 import numpy as np
 import pandas as pd
@@ -143,53 +142,6 @@ ai_snipper_css = """
     color: var(--text-primary) !important;
 }
-/* File upload areas */
-.gr-file-upload {
-    background: var(--bg-card) !important;
-    border: 2px dashed var(--border-accent) !important;
-    border-radius: 16px !important;
-    color: var(--text-secondary) !important;
-    transition: all 0.3s ease !important;
-}
-.gr-file-upload:hover {
-    border-color: var(--ai-cyan) !important;
-    background: var(--bg-card-hover) !important;
-}
-/* Audio input */
-.gr-audio {
-    background: var(--gradient-card) !important;
-    border: 1px solid var(--border-primary) !important;
-    border-radius: 12px !important;
-}
-/* Sliders */
-.gr-slider input[type="range"] {
-    background: var(--bg-secondary) !important;
-}
-.gr-slider input[type="range"]::-webkit-slider-track {
-    background: var(--bg-secondary) !important;
-    border-radius: 6px !important;
-}
-.gr-slider input[type="range"]::-webkit-slider-thumb {
-    background: var(--gradient-button) !important;
-    border: none !important;
-    border-radius: 50% !important;
-    box-shadow: 0 2px 4px rgba(0, 0, 0, 0.2) !important;
-}
-/* Radio buttons and checkboxes */
-.gr-radio input[type="radio"] {
-    accent-color: var(--ai-cyan) !important;
-}
-.gr-checkbox input[type="checkbox"] {
-    accent-color: var(--ai-cyan) !important;
-}
 /* Tabs */
 .gr-tab-nav {
     background: var(--gradient-card) !important;
@@ -214,215 +166,995 @@ ai_snipper_css = """
     box-shadow: 0 2px 4px rgba(6, 182, 212, 0.3) !important;
 }
-.gr-tab-nav button:hover:not(.selected) {
-    background: var(--bg-card-hover) !important;
-    color: var(--text-primary) !important;
-}
-/* Tab content */
-.gr-tabitem {
     background: var(--gradient-card) !important;
     border: 1px solid var(--border-primary) !important;
     border-radius: 12px !important;
-    padding: 1.5rem !important;
-    margin-top: 1rem !important;
 }
-/* Progress bars */
-.gr-progress {
-    background: var(--bg-secondary) !important;
-    border-radius: 6px !important;
-}
-.gr-progress-bar {
     background: var(--gradient-button) !important;
-    border-radius: 6px !important;
-}
-/* Accordion */
-.gr-accordion {
-    background: var(--gradient-card) !important;
-    border: 1px solid var(--border-primary) !important;
-    border-radius: 12px !important;
-}
-.gr-accordion summary {
-    background: var(--bg-card) !important;
-    color: var(--text-primary) !important;
-    padding: 1rem !important;
-    border-radius: 12px !important;
-    cursor: pointer !important;
-    font-weight: 600 !important;
-}
-.gr-accordion[open] summary {
-    border-bottom: 1px solid var(--border-primary) !important;
-    border-radius: 12px 12px 0 0 !important;
-}
-/* JSON output */
-.gr-json {
-    background: var(--bg-secondary) !important;
-    border: 1px solid var(--border-primary) !important;
-    border-radius: 12px !important;
-    color: var(--text-primary) !important;
-}
-/* HTML output areas */
-.gr-html {
-    background: var(--gradient-card) !important;
-    border: 1px solid var(--border-primary) !important;
-    border-radius: 12px !important;
-    padding: 1rem !important;
 }
-/* Plot containers */
-.gr-plot {
-    background: var(--gradient-card) !important;
-    border: 1px solid var(--border-primary) !important;
-    border-radius: 12px !important;
-    padding: 1rem !important;
 }
-/* Rows and columns */
-.gr-row {
-    gap: 1.5rem !important;
 }
-.gr-column {
-    gap: 1rem !important;
-}
-/* Scrollbars */
-::-webkit-scrollbar {
-    width: 8px;
-    height: 8px;
-}
-::-webkit-scrollbar-track {
-    background: var(--bg-secondary);
-    border-radius: 4px;
-}
-::-webkit-scrollbar-thumb {
-    background: var(--gradient-button);
-    border-radius: 4px;
-}
-::-webkit-scrollbar-thumb:hover {
-    background: var(--ai-cyan);
-}
-/* Custom DNA-themed elements */
-.dna-header {
-    position: relative;
-    text-align: center;
-    padding: 2rem 0;
-    margin-bottom: 2rem;
-}
-.dna-header::before {
-    content: '';
-    position: absolute;
-    top: 0;
-    left: 50%;
-    transform: translateX(-50%);
-    width: 100px;
-    height: 4px;
-    background: var(--gradient-primary);
-    border-radius: 2px;
-}
-.dna-subtitle {
-    color: var(--text-muted) !important;
-    font-size: 1.2rem !important;
-    margin-top: 1rem !important;
-    font-weight: 400 !important;
-}
-/* Example button styling */
-.example-buttons .gr-button {
-    background: var(--bg-card) !important;
-    color: var(--text-accent) !important;
-    border: 1px solid var(--border-accent) !important;
-    font-size: 0.875rem !important;
-    padding: 0.5rem 1rem !important;
-}
-.example-buttons .gr-button:hover {
-    background: var(--gradient-button) !important;
-    color: var(--text-primary) !important;
-    border-color: transparent !important;
-}
-/* Status messages */
-.status-message {
-    text-align: center !important;
-    padding: 1rem !important;
-    border-radius: 8px !important;
-    margin: 1rem 0 !important;
-    font-weight: 500 !important;
-}
-.status-loading {
-    background: rgba(6, 182, 212, 0.1) !important;
-    border: 1px solid var(--border-accent) !important;
-    color: var(--text-accent) !important;
-}
-.status-success {
-    background: rgba(20, 184, 166, 0.1) !important;
-    border: 1px solid var(--ai-teal) !important;
-    color: var(--ai-teal) !important;
-}
-.status-error {
-    background: rgba(239, 68, 68, 0.1) !important;
-    border: 1px solid #ef4444 !important;
-    color: #ef4444 !important;
-}
-/* Footer hiding */
-footer {
-    visibility: hidden !important;
-}
-/* Mobile responsiveness */
-@media (max-width: 768px) {
-    .gradio-container h1 {
-        font-size: 2rem !important;
-    }
-    .gr-button {
-        width: 100% !important;
-        justify-content: center !important;
-    }
-    .gr-row {
-        flex-direction: column !important;
-    }
-}
-"""
-# Keep all your existing function code exactly the same
-# [Your existing global variables and all functions remain unchanged]
-# Global variables to store models
-tokenizer = None
-ner_pipeline = None
-pos_pipeline = None
-intent_classifier = None
-semantic_model = None
-stt_model = None  # Speech-to-text model
-models_loaded = False
-# Database to store keyword ranking history (in-memory database for this example)
-ranking_history = {}
-# [Keep all your existing functions - load_models, speech_to_text, etc.]
-# I'm not repeating them here to save space, but they should remain exactly the same
-# Updated Gradio interface with AI Snipper styling
 with gr.Blocks(
     css=ai_snipper_css,
     title="🧬 AI Snipper Keyword DNA Analyzer",
@@ -436,9 +1168,11 @@ with gr.Blocks(
     # Custom header with DNA theme
     gr.HTML("""
-    <div class="dna-header">
-        <h1>🧬 Keyword DNA Analyzer</h1>
-        <p class="dna-subtitle">
             Decode the genetic structure of your keywords with AI-powered analysis
         </p>
     </div>
@@ -489,19 +1223,18 @@ with gr.Blocks(
             # Status indicator with custom styling
             status_html = gr.HTML(
-                '<div class="status-message">🚀 Enter a keyword and click "Analyze DNA" to begin</div>'
             )
             # Main analyze button
             analyze_btn = gr.Button(
                 "🧬 Analyze DNA",
-                variant="primary",
-                size="lg"
             )
             # Example buttons with custom styling
             gr.Markdown("### 💡 Try These Examples")
-            with gr.Row(elem_classes="example-buttons"):
                 example_btns = []
                 examples = [
                     "preprocessing",
@@ -533,7 +1266,7 @@ with gr.Blocks(
                 with gr.Tab("💾 Raw Data"):
                     json_output = gr.JSON()
-    # Event handlers remain the same but with updated status messages
     voice_submit_btn.click(
         handle_voice_input,
         inputs=[audio_input],
@@ -542,14 +1275,14 @@ with gr.Blocks(
     # Updated status messages with custom styling
     analyze_btn.click(
-        lambda: '<div class="status-message status-loading">🔄 Loading models and analyzing... This may take a moment.</div>',
         outputs=status_html
     ).then(
         analyze_keyword,
         inputs=[input_text, forecast_months, growth_scenario, include_serp],
         outputs=[token_viz_html, analysis_html, json_output, evolution_chart, serp_html, ranking_chart, input_text]
     ).then(
-        lambda: '<div class="status-message status-success">✅ Analysis complete! Check the results above.</div>',
         outputs=status_html
     )
@@ -564,21 +1297,17 @@ with gr.Blocks(
             inputs=[btn],
             outputs=[input_text]
         ).then(
-            lambda: '<div class="status-message status-loading">��� Loading models and analyzing... This may take a moment.</div>',
             outputs=status_html
         ).then(
             analyze_keyword,
             inputs=[input_text, forecast_months, growth_scenario, include_serp],
             outputs=[token_viz_html, analysis_html, json_output, evolution_chart, serp_html, ranking_chart, input_text]
         ).then(
-            lambda: '<div class="status-message status-success">✅ Analysis complete! Check the results above.</div>',
             outputs=status_html
         )
 # Launch configuration
 if __name__ == "__main__":
-    demo.launch(
-        share=True,
-        show_error=True,
-        debug=True
-    )

 import gradio as gr
 import numpy as np
 import pandas as pd
     color: var(--text-primary) !important;
 }
 /* Tabs */
 .gr-tab-nav {
     background: var(--gradient-card) !important;
     box-shadow: 0 2px 4px rgba(6, 182, 212, 0.3) !important;
 }
+/* Other elements */
+.gr-audio, .gr-file-upload {
     background: var(--gradient-card) !important;
     border: 1px solid var(--border-primary) !important;
     border-radius: 12px !important;
 }
+.gr-slider input[type="range"]::-webkit-slider-thumb {
     background: var(--gradient-button) !important;
+    border: none !important;
+    border-radius: 50% !important;
 }
+.gr-radio input[type="radio"], .gr-checkbox input[type="checkbox"] {
+    accent-color: var(--ai-cyan) !important;
 }
+/* Footer hiding */
+footer {
+    visibility: hidden !important;
 }
+"""
+# Global variables to store models
+tokenizer = None
+ner_pipeline = None
+pos_pipeline = None
+intent_classifier = None
+semantic_model = None
+stt_model = None  # Speech-to-text model
+models_loaded = False
+# Database to store keyword ranking history (in-memory database for this example)
+# In a real app, you would use a proper database
+ranking_history = {}
+def load_models(progress=gr.Progress()):
+    """Lazy-load models only when needed"""
+    global tokenizer, ner_pipeline, pos_pipeline, intent_classifier, semantic_model, stt_model, models_loaded
+    if models_loaded:
+        return True
+    try:
+        progress(0.1, desc="Loading models...")
+        # Use smaller models and load them sequentially to reduce memory pressure
+        from transformers import AutoTokenizer, pipeline
+        progress(0.2, desc="Loading tokenizer...")
+        tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
+        progress(0.3, desc="Loading NER model...")
+        ner_pipeline = pipeline("ner", model="dslim/bert-base-NER")
+        progress(0.4, desc="Loading POS model...")
+        # Use smaller POS model
+        from transformers import AutoModelForTokenClassification, BertTokenizerFast
+        pos_model = AutoModelForTokenClassification.from_pretrained("vblagoje/bert-english-uncased-finetuned-pos")
+        pos_tokenizer = BertTokenizerFast.from_pretrained("vblagoje/bert-english-uncased-finetuned-pos")
+        pos_pipeline = pipeline("token-classification", model=pos_model, tokenizer=pos_tokenizer)
+        progress(0.6, desc="Loading intent classifier...")
+        # Use a smaller model for zero-shot classification
+        intent_classifier = pipeline(
+            "zero-shot-classification",
+            model="typeform/distilbert-base-uncased-mnli",  # Smaller than BART
+            device=0 if torch.cuda.is_available() else -1   # Use GPU if available
+        )
+        progress(0.7, desc="Loading speech-to-text model...")
+        try:
+            # Load automatic speech recognition model
+            from transformers import WhisperProcessor, WhisperForConditionalGeneration
+            processor = WhisperProcessor.from_pretrained("openai/whisper-small.en")
+            stt_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small.en")
+            stt_model = (processor, stt_model)
+        except Exception as e:
+            print(f"Warning: Could not load speech-to-text model: {str(e)}")
+            stt_model = None  # Set to None so we can check if it's available
+        progress(0.8, desc="Loading semantic model...")
+        try:
+            from sentence_transformers import SentenceTransformer
+            semantic_model = SentenceTransformer('all-MiniLM-L6-v2')
+        except Exception as e:
+            print(f"Warning: Could not load semantic model: {str(e)}")
+            semantic_model = None  # Set to None so we can check if it's available
+        progress(1.0, desc="Models loaded successfully!")
+        models_loaded = True
+        return True
+    except Exception as e:
+        print(f"Error loading models: {str(e)}")
+        return f"Error: {str(e)}"
+def speech_to_text(audio_path):
+    """Convert speech to text using the loaded speech-to-text model"""
+    if stt_model is None:
+        return "Speech-to-text model not loaded. Please try text input instead."
+    try:
+        import librosa
+        import numpy as np
+        # Load audio file
+        audio, sr = librosa.load(audio_path, sr=16000)
+        # Process audio with Whisper
+        processor, model = stt_model
+        input_features = processor(audio, sampling_rate=16000, return_tensors="pt").input_features
+        # Generate token ids
+        predicted_ids = model.generate(input_features)
+        # Decode token ids to text
+        transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
+        return transcription
+    except Exception as e:
+        print(f"Error in speech_to_text: {str(e)}")
+        return f"Error processing speech: {str(e)}"
+def handle_voice_input(audio):
+    """Handle voice input and convert to text"""
+    if audio is None:
+        return "No audio detected. Please try again."
+    try:
+        # Convert speech to text
+        text = speech_to_text(audio)
+        return text
+    except Exception as e:
+        print(f"Error in handle_voice_input: {str(e)}")
+        return f"Error: {str(e)}"
+def simulate_google_serp(keyword, num_results=10):
+    """Simulate Google SERP results for a keyword"""
+    try:
+        # In a real implementation, this would call the Google API
+        # For now, we'll generate fake SERP data
+        # Deterministic seed for consistent results by keyword
+        np.random.seed(sum(ord(c) for c in keyword))
+        serp_results = []
+        domains = [
+            "example.com", "wikipedia.org", "medium.com", "github.com",
+            "stackoverflow.com", "amazon.com", "youtube.com", "reddit.com",
+            "linkedin.com", "twitter.com", "facebook.com", "instagram.com"
+        ]
+        for i in range(1, num_results + 1):
+            domain = domains[i % len(domains)]
+            title = f"{keyword.title()} - {domain.split('.')[0].title()} Resource #{i}"
+            snippet = f"This is a simulated SERP result for '{keyword}'. Result #{i} would provide relevant information about this topic."
+            url = f"https://www.{domain}/{keyword.replace(' ', '-')}-resource-{i}"
+            position = i
+            ctr = round(0.3 * (0.85 ** (i - 1)), 4)  # Simulate click-through rate decay
+            serp_results.append({
+                "position": position,
+                "title": title,
+                "url": url,
+                "domain": domain,
+                "snippet": snippet,
+                "ctr_estimate": ctr,
+                "impressions_estimate": np.random.randint(1000, 10000)
+            })
+        return serp_results
+    except Exception as e:
+        print(f"Error in simulate_google_serp: {str(e)}")
+        return []
+def update_ranking_history(keyword, serp_results):
+    """Update the ranking history for a keyword"""
+    try:
+        # Get current timestamp
+        timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        # Initialize if keyword not in history
+        if keyword not in ranking_history:
+            ranking_history[keyword] = []
+        # Add new entry
+        ranking_history[keyword].append({
+            "timestamp": timestamp,
+            "results": serp_results[:5]  # Store top 5 results for history
+        })
+        # Keep only last 10 entries for each keyword
+        if len(ranking_history[keyword]) > 10:
+            ranking_history[keyword] = ranking_history[keyword][-10:]
+        return True
+    except Exception as e:
+        print(f"Error in update_ranking_history: {str(e)}")
+        return False
+def get_semantic_similarity(token, comparison_terms):
+    """Calculate semantic similarity between a token and comparison terms"""
+    try:
+        from sklearn.metrics.pairwise import cosine_similarity
+        token_embedding = semantic_model.encode([token])[0]
+        comparison_embeddings = semantic_model.encode(comparison_terms)
+        similarities = []
+        for i, emb in enumerate(comparison_embeddings):
+            similarity = cosine_similarity([token_embedding], [emb])[0][0]
+            similarities.append((comparison_terms[i], float(similarity)))
+        return sorted(similarities, key=lambda x: x[1], reverse=True)
+    except Exception as e:
+        print(f"Error in semantic similarity: {str(e)}")
+        # Return dummy data on error
+        return [(term, 0.5) for term in comparison_terms]
+def get_token_colors(token_type):
+    colors = {
+        "prefix": "#D8BFD8",  # Light purple
+        "suffix": "#AEDAA4",  # Light green
+        "stem": "#A4C2F4",    # Light blue
+        "compound_first": "#FFCC80",  # Light orange
+        "compound_second": "#FFCC80", # Light orange
+        "word": "#E5E5E5"     # Light gray
+    }
+    return colors.get(token_type, "#E5E5E5")
+def simulate_historical_data(token):
+    """Generate simulated historical usage data for a token"""
+    eras = ["1900s", "1950s", "1980s", "2000s", "2010s", "Present"]
+    # Different patterns based on token characteristics
+    if len(token) > 8:
+        # Possibly a technical term - recent growth
+        values = [10, 20, 30, 60, 85, 95]
+    elif token.startswith(("un", "re", "de", "pre")):
+        # Prefix words tend to be older
+        values = [45, 50, 60, 70, 75, 80]
+    else:
+        # Standard pattern for common words
+        # Use token hash value modulo instead of hash() directly to avoid different results across runs
+        base = 50 + (sum(ord(c) for c in token) % 30)
+        # Use a fixed seed for reproducibility
+        np.random.seed(sum(ord(c) for c in token))
+        noise = np.random.normal(0, 5, 6)
+        values = [max(5, min(95, base + i*5 + n)) for i, n in enumerate(noise)]
+    return list(zip(eras, values))
+def generate_origin_data(token):
+    """Generate simulated origin/etymology data for a token"""
+    origins = [
+        {"era": "Ancient", "language": "Latin"},
+        {"era": "Ancient", "language": "Greek"},
+        {"era": "Medieval", "language": "Old English"},
+        {"era": "16th century", "language": "French"},
+        {"era": "18th century", "language": "Germanic"},
+        {"era": "19th century", "language": "Anglo-Saxon"},
+        {"era": "20th century", "language": "Modern English"}
+    ]
+    # Deterministic selection based on the token
+    index = sum(ord(c) for c in token) % len(origins)
+    origin = origins[index]
+    note = f"First appeared in {origin['era']} texts derived from {origin['language']}."
+    origin["note"] = note
+    return origin
+def analyze_token_types(tokens):
+    """Identify token types (prefix, suffix, compound, etc.)"""
+    processed_tokens = []
+    prefixes = ["un", "re", "de", "pre", "post", "anti", "pro", "inter", "sub", "super"]
+    suffixes = ["ing", "ed", "ly", "ment", "tion", "able", "ible", "ness", "ful", "less"]
+    for token in tokens:
+        token_text = token.lower()
+        token_type = "word"
+        # Check for prefixes
+        for prefix in prefixes:
+            if token_text.startswith(prefix) and len(token_text) > len(prefix) + 2:
+                if token_text != prefix:  # Make sure the word isn't just the prefix
+                    token_type = "prefix"
+                    break
+        # Check for suffixes
+        if token_type == "word":
+            for suffix in suffixes:
+                if token_text.endswith(suffix) and len(token_text) > len(suffix) + 2:
+                    token_type = "suffix"
+                    break
+        # Check for compound words (simplified)
+        if token_type == "word" and len(token_text) > 8:
+            token_type = "compound_first"  # Simplified - in reality would need more analysis
+        processed_tokens.append({
+            "text": token_text,
+            "type": token_type
+        })
+    return processed_tokens
+def plot_historical_data(historical_data):
+    """Create a plot of historical usage data, with error handling"""
+    try:
+        eras = [item[0] for item in historical_data]
+        values = [item[1] for item in historical_data]
+        plt.figure(figsize=(8, 3))
+        plt.bar(eras, values, color='skyblue')
+        plt.title('Historical Usage')
+        plt.xlabel('Era')
+        plt.ylabel('Usage Level')
+        plt.ylim(0, 100)
+        plt.xticks(rotation=45)
+        plt.tight_layout()
+        return plt
+    except Exception as e:
+        print(f"Error in plot_historical_data: {str(e)}")
+        # Return a simple error plot
+        plt.figure(figsize=(8, 3))
+        plt.text(0.5, 0.5, f"Error creating plot: {str(e)}",
+                 horizontalalignment='center', verticalalignment='center')
+        plt.axis('off')
+        return plt
+def create_evolution_chart(data, forecast_months=6, growth_scenario="Moderate"):
+    """Create a simpler chart that's more compatible with Gradio"""
+    try:
+        import plotly.graph_objects as go
+        # Create a basic figure without subplots
+        fig = go.Figure()
+        # Add main trace for search volume
+        fig.add_trace(
+            go.Scatter(
+                x=[item["month"] for item in data],
+                y=[item["searchVolume"] for item in data],
+                name="Search Volume",
+                line=dict(color="#8884d8", width=3),
+                mode="lines+markers"
+            )
+        )
+        # Scale the other metrics to be visible on the same chart
+        max_volume = max([item["searchVolume"] for item in data])
+        scale_factor = max_volume / 100
+        # Add competition score (scaled)
+        fig.add_trace(
+            go.Scatter(
+                x=[item["month"] for item in data],
+                y=[item["competitionScore"] * scale_factor for item in data],
+                name="Competition Score",
+                line=dict(color="#82ca9d", width=2, dash="dot"),
+                mode="lines+markers"
+            )
+        )
+        # Add intent clarity (scaled)
+        fig.add_trace(
+            go.Scatter(
+                x=[item["month"] for item in data],
+                y=[item["intentClarity"] * scale_factor for item in data],
+                name="Intent Clarity",
+                line=dict(color="#ffc658", width=2, dash="dash"),
+                mode="lines+markers"
+            )
+        )
+        # Simple layout
+        fig.update_layout(
+            title=f"Keyword Evolution Forecast ({growth_scenario} Growth)",
+            xaxis_title="Month",
+            yaxis_title="Value",
+            legend=dict(orientation="h", y=1.1),
+            height=500
+        )
+        return fig
+    except Exception as e:
+        print(f"Error in chart creation: {str(e)}")
+        # Fallback to an even simpler chart
+        fig = go.Figure(data=go.Scatter(x=[1, 2, 3], y=[4, 1, 2]))
+        fig.update_layout(title="Fallback Chart (Error occurred)")
+        return fig
+def create_ranking_history_chart(keyword_history):
+    """Create a chart showing keyword ranking history over time"""
+    try:
+        if not keyword_history or len(keyword_history) < 2:
+            # Not enough data for a meaningful chart
+            fig = go.Figure()
+            fig.update_layout(
+                title="Insufficient Ranking Data",
+                annotations=[{
+                    "text": "Need at least 2 data points for ranking history",
+                    "showarrow": False,
+                    "font": {"size": 16},
+                    "xref": "paper",
+                    "yref": "paper",
+                    "x": 0.5,
+                    "y": 0.5
+                }]
+            )
+            return fig
+        # Create a figure
+        fig = go.Figure()
+        # Extract timestamps and convert to datetime objects
+        timestamps = [entry["timestamp"] for entry in keyword_history]
+        dates = [datetime.datetime.strptime(ts, "%Y-%m-%d %H:%M:%S") for ts in timestamps]
+        # Get unique domains from all results
+        all_domains = set()
+        for entry in keyword_history:
+            for result in entry["results"]:
+                all_domains.add(result["domain"])
+        # Colors for different domains
+        domain_colors = {}
+        color_palette = [
+            "#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd",
+            "#8c564b", "#e377c2", "#7f7f7f", "#bcbd22", "#17becf"
+        ]
+        for i, domain in enumerate(all_domains):
+            domain_colors[domain] = color_palette[i % len(color_palette)]
+        # Track domains and their positions over time
+        domain_tracking = {domain: {"x": [], "y": [], "text": []} for domain in all_domains}
+        for i, entry in enumerate(keyword_history):
+            for result in entry["results"]:
+                domain = result["domain"]
+                position = result["position"]
+                title = result["title"]
+                domain_tracking[domain]["x"].append(dates[i])
+                domain_tracking[domain]["y"].append(position)
+                domain_tracking[domain]["text"].append(title)
+        # Add traces for each domain
+        for domain, data in domain_tracking.items():
+            if len(data["x"]) > 0:  # Only add domains that have data
+                fig.add_trace(
+                    go.Scatter(
+                        x=data["x"],
+                        y=data["y"],
+                        mode="lines+markers",
+                        name=domain,
+                        line=dict(color=domain_colors[domain]),
+                        hovertemplate="%{text}<br>Position: %{y}<br>Date: %{x}<extra></extra>",
+                        text=data["text"],
+                        marker=dict(size=8)
+                    )
+                )
+        # Update layout
+        fig.update_layout(
+            title="Keyword Ranking History",
+            xaxis_title="Date",
+            yaxis_title="Position",
+            yaxis=dict(autorange="reversed"),  # Invert y-axis so position 1 is on top
+            hovermode="closest",
+            height=500
+        )
+        return fig
+    except Exception as e:
+        print(f"Error in create_ranking_history_chart: {str(e)}")
+        # Return fallback chart
+        fig = go.Figure()
+        fig.update_layout(
+            title="Error Creating Ranking Chart",
+            annotations=[{
+                "text": f"Error: {str(e)}",
+                "showarrow": False,
+                "font": {"size": 14},
+                "xref": "paper",
+                "yref": "paper",
+                "x": 0.5,
+                "y": 0.5
+            }]
+        )
+        return fig
+def generate_serp_html(keyword, serp_results):
+    """Generate HTML for SERP results"""
+    if not serp_results:
+        return "<div>No SERP results available</div>"
+    html = f"""
+    <div style="font-family: Arial, sans-serif; padding: 20px; border: 1px solid #ddd; border-radius: 8px;">
+        <h2 style="margin-top: 0;">SERP Results for "{keyword}"</h2>
+        <div style="background-color: #f5f5f5; padding: 10px; border-radius: 4px; margin-bottom: 20px;">
+            <div style="color: #666; font-size: 12px;">This is a simulated SERP. In a real application, this would use the Google API.</div>
+        </div>
+        <div class="serp-results" style="display: flex; flex-direction: column; gap: 16px;">
+    """
+    for result in serp_results:
+        position = result["position"]
+        title = result["title"]
+        url = result["url"]
+        snippet = result["snippet"]
+        domain = result["domain"]
+        ctr = result["ctr_estimate"]
+        impressions = result["impressions_estimate"]
+        html += f"""
+        <div class="serp-result" style="padding: 15px; border: 1px solid #e2e8f0; border-radius: 6px; position: relative;">
+            <div style="position: absolute; top: -10px; left: -10px; background-color: #4299e1; color: white; width: 24px; height: 24px; border-radius: 50%; display: flex; align-items: center; justify-content: center; font-size: 12px;">
+                {position}
+            </div>
+            <div style="margin-bottom: 5px;">
+                <a href="#" style="font-size: 18px; color: #1a73e8; text-decoration: none; font-weight: 500;">{title}</a>
+            </div>
+            <div style="margin-bottom: 8px; color: #006621; font-size: 14px;">{url}</div>
+            <div style="color: #4d5156; font-size: 14px;">{snippet}</div>
+            <div style="display: flex; margin-top: 10px; font-size: 12px; color: #666;">
+                <div style="margin-right: 15px;"><span style="font-weight: 500;">CTR:</span> {ctr:.2%}</div>
+                <div><span style="font-weight: 500;">Est. Impressions:</span> {impressions:,}</div>
+            </div>
+        </div>
+        """
+    html += """
+        </div>
+    </div>
+    """
+    return html
+def generate_token_visualization_html(token_analysis, full_analysis):
+    """Generate HTML for token visualization"""
+    html = """
+    <div style="font-family: Arial, sans-serif; padding: 20px; border: 1px solid #ddd; border-radius: 8px;">
+        <h2 style="margin-top: 0;">Token Visualization</h2>
+        <div style="margin-bottom: 20px; padding: 15px; background-color: #f8f9fa; border-radius: 6px;">
+            <div style="margin-bottom: 8px; font-weight: bold; color: #4a5568;">Human View:</div>
+            <div style="display: flex; flex-wrap: wrap; gap: 8px;">
+    """
+    # Add human view tokens
+    for token in token_analysis:
+        html += f"""
+        <div style="padding: 6px 12px; background-color: white; border: 1px solid #cbd5e0; border-radius: 4px;">
+            {token['text']}
+        </div>
+        """
+    html += """
+            </div>
+        </div>
+        <div style="text-align: center; margin: 15px 0;">
+            <span style="font-size: 20px;">↓</span>
+        </div>
+        <div style="padding: 15px; background-color: #f0fff4; border-radius: 6px;">
+            <div style="margin-bottom: 8px; font-weight: bold; color: #2f855a;">Machine View:</div>
+            <div style="display: flex; flex-wrap: wrap; gap: 8px;">
+    """
+    # Add machine view tokens
+    for token in full_analysis:
+        bg_color = get_token_colors(token["type"])
+        html += f"""
+        <div style="padding: 6px 12px; background-color: {bg_color}; border: 1px solid #a0aec0; border-radius: 4px; font-family: monospace;">
+            {token['token']}
+            <span style="font-size: 10px; opacity: 0.7; display: block;">{token['type']}</span>
+        </div>
+        """
+    html += """
+            </div>
+        </div>
+        <div style="margin-top: 20px; display: grid; grid-template-columns: repeat(3, 1fr); gap: 10px; text-align: center;">
+    """
+    # Add stats
+    word_count = len(token_analysis)
+    token_count = len(full_analysis)
+    ratio = round(token_count / max(1, word_count), 2)
+    html += f"""
+        <div style="background-color: #ebf8ff; padding: 10px; border-radius: 6px;">
+            <div style="font-size: 24px; font-weight: bold; color: #3182ce;">{word_count}</div>
+            <div style="font-size: 14px; color: #4299e1;">Words</div>
+        </div>
+        <div style="background-color: #f0fff4; padding: 10px; border-radius: 6px;">
+            <div style="font-size: 24px; font-weight: bold; color: #38a169;">{token_count}</div>
+            <div style="font-size: 14px; color: #48bb78;">Tokens</div>
+        </div>
+        <div style="background-color: #faf5ff; padding: 10px; border-radius: 6px;">
+            <div style="font-size: 24px; font-weight: bold; color: #805ad5;">{ratio}</div>
+            <div style="font-size: 14px; color: #9f7aea;">Tokens per Word</div>
+        </div>
+    """
+    html += """
+        </div>
+    </div>
+    """
+    return html
+def generate_full_analysis_html(keyword, token_analysis, intent_analysis, evolution_potential, trends):
+    """Generate HTML for full keyword analysis"""
+    html = f"""
+    <div style="font-family: Arial, sans-serif; padding: 20px; border: 1px solid #ddd; border-radius: 8px;">
+        <h2 style="margin-top: 0;">Keyword DNA Analysis for: {keyword}</h2>
+        <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 15px; margin-bottom: 20px;">
+            <div style="padding: 15px; border: 1px solid #e2e8f0; border-radius: 6px;">
+                <h3 style="margin-top: 0; font-size: 16px;">Intent Gene</h3>
+                <div style="display: flex; justify-content: space-between; margin-bottom: 10px;">
+                    <span>Type:</span>
+                    <span>{intent_analysis['type']}</span>
+                </div>
+                <div style="display: flex; justify-content: space-between; align-items: center;">
+                    <span>Strength:</span>
+                    <div style="width: 120px; height: 8px; background-color: #edf2f7; border-radius: 4px; overflow: hidden;">
+                        <div style="height: 100%; background-color: #48bb78; width: {intent_analysis['strength']}%;"></div>
+                    </div>
+                </div>
+            </div>
+            <div style="padding: 15px; border: 1px solid #e2e8f0; border-radius: 6px;">
+                <h3 style="margin-top: 0; font-size: 16px;">Evolution Potential</h3>
+                <div style="display: flex; justify-content: center; align-items: center; height: 100px;">
+                    <div style="position: relative; width: 100px; height: 100px;">
+                        <div style="position: absolute; inset: 0; display: flex; align-items: center; justify-content: center;">
+                            <span style="font-size: 24px; font-weight: bold;">{evolution_potential}</span>
+                        </div>
+                        <svg width="100" height="100" viewBox="0 0 36 36">
+                            <path
+                              d="M18 2.0845 a 15.9155 15.9155 0 0 1 0 31.831 a 15.9155 15.9155 0 0 1 0 -31.831"
+                              fill="none"
+                              stroke="#4CAF50"
+                              stroke-width="3"
+                              stroke-dasharray="{evolution_potential}, 100"
+                            />
+                        </svg>
+                    </div>
+                </div>
+            </div>
+        </div>
+        <div style="padding: 15px; border: 1px solid #e2e8f0; border-radius: 6px; margin-bottom: 20px;">
+            <h3 style="margin-top: 0; font-size: 16px;">Future Mutations</h3>
+            <div style="display: flex; flex-direction: column; gap: 8px;">
+    """
+    # Add trends
+    for trend in trends:
+        html += f"""
+        <div style="display: flex; align-items: center; gap: 8px;">
+            <span style="color: #48bb78;">↗</span>
+            <span>{trend}</span>
+        </div>
+        """
+    html += """
+            </div>
+        </div>
+        <h3 style="margin-bottom: 10px;">Token Details & Historical Analysis</h3>
+    """
+    # Add token details
+    for token in token_analysis:
+        html += f"""
+        <div style="padding: 15px; border: 1px solid #e2e8f0; border-radius: 6px; margin-bottom: 15px;">
+            <div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 10px;">
+                <div style="display: flex; align-items: center; gap: 8px;">
+                    <span style="font-size: 18px; font-weight: medium;">{token['token']}</span>
+                    <span style="padding: 2px 8px; background-color: #edf2f7; border-radius: 4px; font-size: 12px;">{token['posTag']}</span>
+        """
+        if token['entityType']:
+            html += f"""
+            <span style="padding: 2px 8px; background-color: #ebf8ff; color: #3182ce; border-radius: 4px; font-size: 12px; display: flex; align-items: center;">
+                ⓘ {token['entityType']}
+            </span>
+            """
+        html += f"""
+                </div>
+                <div style="display: flex; align-items: center; gap: 4px;">
+                    <span style="font-size: 12px; color: #718096;">Importance:</span>
+                    <div style="width: 64px; height: 8px; background-color: #edf2f7; border-radius: 4px; overflow: hidden;">
+                        <div style="height: 100%; background-color: #4299e1; width: {token['importance']}%;"></div>
+                    </div>
+                </div>
+            </div>
+            <div style="margin-top: 15px;">
+                <div style="font-size: 12px; color: #718096; margin-bottom: 4px;">Historical Relevance:</div>
+                <div style="border: 1px solid #e2e8f0; border-radius: 4px; padding: 10px; background-color: #f7fafc;">
+                    <div style="font-size: 12px; margin-bottom: 8px;">
+                        <span style="font-weight: 500;">Origin: </span>
+                        <span>{token['origin']['era']}, </span>
+                        <span style="font-style: italic;">{token['origin']['language']}</span>
+                    </div>
+                    <div style="font-size: 12px; margin-bottom: 12px;">{token['origin']['note']}</div>
+                    <div style="display: flex; align-items: flex-end; height: 50px; gap: 4px; margin-top: 8px;">
+        """
+        # Add historical data bars
+        for period, value in token['historicalData']:
+            opacity = 0.3 + (token['historicalData'].index((period, value)) * 0.1)
+            html += f"""
+            <div style="display: flex; flex-direction: column; align-items: center; flex: 1;">
+                <div style="width: 100%; background-color: rgba(66, 153, 225, {opacity}); border-radius: 2px 2px 0 0; height: {max(4, value)}%;"></div>
+                <div style="font-size: 9px; margin-top: 4px; color: #718096; transform: rotate(45deg); transform-origin: top left; white-space: nowrap;">
+                    {period}
+                </div>
+            </div>
+            """
+        html += """
+                    </div>
+                </div>
+            </div>
+        </div>
+        """
+    html += """
+    </div>
+    """
+    return html
+def analyze_keyword(keyword, forecast_months=6, growth_scenario="Moderate", get_serp=False, progress=gr.Progress()):
+    """Main function to analyze a keyword"""
+    if not keyword or not keyword.strip():
+        return (
+            "<div>Please enter a keyword to analyze</div>",
+            "<div>Please enter a keyword to analyze</div>",
+            None,
+            None,
+            None,
+            None,
+            None
+        )
+    progress(0.1, desc="Starting analysis...")
+    # Load models if not already loaded
+    model_status = load_models(progress)
+    if isinstance(model_status, str) and model_status.startswith("Error"):
+        return (
+            f"<div style='color:red;'>{model_status}</div>",
+            f"<div style='color:red;'>{model_status}</div>",
+            None,
+            None,
+            None,
+            None,
+            None
+        )
+    try:
+        # Basic tokenization - just split on spaces for simplicity
+        words = keyword.strip().lower().split()
+        progress(0.2, desc="Analyzing tokens...")
+        # Get token types
+        token_analysis = analyze_token_types(words)
+        progress(0.3, desc="Running NER...")
+        # Get NER tags - handle potential errors
+        try:
+            ner_results = ner_pipeline(keyword)
+        except Exception as e:
+            print(f"NER error: {str(e)}")
+            ner_results = []
+        progress(0.4, desc="Running POS tagging...")
+        # Get POS tags - handle potential errors
+        try:
+            pos_results = pos_pipeline(keyword)
+        except Exception as e:
+            print(f"POS error: {str(e)}")
+            pos_results = []
+        # Process and organize results
+        full_token_analysis = []
+        for token in token_analysis:
+            # Find POS tag for this token
+            pos_tag = "NOUN"  # Default
+            for pos_result in pos_results:
+                if pos_result["word"].lower() == token["text"]:
+                    pos_tag = pos_result["entity"]
+                    break
+            # Find entity type if any
+            entity_type = None
+            for ner_result in ner_results:
+                if ner_result["word"].lower() == token["text"]:
+                    entity_type = ner_result["entity"]
+                    break
+            # Generate historical data
+            historical_data = simulate_historical_data(token["text"])
+            # Generate origin data
+            origin = generate_origin_data(token["text"])
+            # Calculate importance (simplified algorithm)
+            importance = 60 + (len(token["text"]) * 2)
+            importance = min(95, importance)
+            # Generate more meaningful related terms using semantic similarity
+            if semantic_model is not None:
+                try:
+                    # Generate some potential related terms
+                    prefix_related = [f"about {token['text']}", f"what is {token['text']}", f"how to {token['text']}"]
+                    synonym_candidates = ["similar", "equivalent", "comparable", "like", "related", "alternative"]
+                    domain_terms = ["software", "marketing", "business", "science", "education", "technology"]
+                    comparison_terms = prefix_related + synonym_candidates + domain_terms
+                    # Get similarities
+                    similarities = get_semantic_similarity(token['text'], comparison_terms)
+                    # Use top 3 most similar terms
+                    related_terms = [term for term, score in similarities[:3]]
+                except Exception as e:
+                    print(f"Error generating semantic related terms: {str(e)}")
+                    related_terms = [f"{token['text']}-related-1", f"{token['text']}-related-2"]
+            else:
+                # Fallback if semantic model isn't loaded
+                related_terms = [f"{token['text']}-related-1", f"{token['text']}-related-2"]
+            full_token_analysis.append({
+                "token": token["text"],
+                "type": token["type"],
+                "posTag": pos_tag,
+                "entityType": entity_type,
+                "importance": importance,
+                "historicalData": historical_data,
+                "origin": origin,
+                "relatedTerms": related_terms
+            })
+        progress(0.5, desc="Analyzing intent...")
+        # Intent analysis - handle potential errors
+        try:
+            intent_result = intent_classifier(
+                keyword,
+                candidate_labels=["informational", "navigational", "transactional"]
+            )
+            intent_analysis = {
+                "type": intent_result["labels"][0].capitalize(),
+                "strength": round(intent_result["scores"][0] * 100),
+                "mutations": [
+                    f"{intent_result['labels'][0]}-variation-1",
+                    f"{intent_result['labels'][0]}-variation-2"
+                ]
+            }
+        except Exception as e:
+            print(f"Intent classification error: {str(e)}")
+            intent_analysis = {
+                "type": "Informational",  # Default fallback
+                "strength": 70,
+                "mutations": ["fallback-variation-1", "fallback-variation-2"]
+            }
+        # Evolution potential (simplified calculation)
+        evolution_potential = min(95, 65 + (len(keyword) % 30))
+        # Predicted trends (simplified)
+        trends = [
+            "Voice search adaptation",
+            "Visual search integration"
+        ]
+        # Generate more realistic and keyword-specific evolution data
+        base_volume = 1000 + (len(keyword) * 100)
+        # Adjust growth factor based on scenario
+        if growth_scenario == "Conservative":
+            growth_factor = 1.05 + (0.02 * (sum(ord(c) for c in keyword) % 5))
+        elif growth_scenario == "Aggressive":
+            growth_factor = 1.15 + (0.05 * (sum(ord(c) for c in keyword) % 5))
+        else:  # Moderate
+            growth_factor = 1.1 + (0.03 * (sum(ord(c) for c in keyword) % 5))
+        evolution_data = []
+        months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"][:int(forecast_months)]
+        current_volume = base_volume
+        for month in months:
+            # Add some randomness to make it look more realistic
+            np.random.seed(sum(ord(c) for c in month + keyword))
+            random_factor = 0.9 + (0.2 * np.random.random())
+            current_volume *= growth_factor * random_factor
+            evolution_data.append({
+                "month": month,
+                "searchVolume": int(current_volume),
+                "competitionScore": min(95, 45 + (months.index(month) * 3) + (sum(ord(c) for c in keyword) % 10)),
+                "intentClarity": min(95, 80 + (months.index(month) * 2) + (sum(ord(c) for c in keyword) % 5))
+            })
+        progress(0.6, desc="Creating visualizations...")
+        # Create interactive evolution chart
+        evolution_chart = create_evolution_chart(evolution_data, forecast_months, growth_scenario)
+        # SERP results and ranking history (new feature)
+        serp_results = None
+        ranking_chart = None
+        serp_html = None
+        if get_serp:
+            progress(0.7, desc="Fetching SERP data...")
+            # Get SERP results
+            serp_results = simulate_google_serp(keyword)
+            # Update ranking history
+            update_ranking_history(keyword, serp_results)
+            progress(0.8, desc="Creating ranking charts...")
+            # Create ranking history chart
+            if keyword in ranking_history and len(ranking_history[keyword]) > 0:
+                ranking_chart = create_ranking_history_chart(ranking_history[keyword])
+            # Generate SERP HTML
+            serp_html = generate_serp_html(keyword, serp_results)
+        # Generate HTML for token visualization
+        token_viz_html = generate_token_visualization_html(token_analysis, full_token_analysis)
+        # Generate HTML for full analysis
+        analysis_html = generate_full_analysis_html(
+            keyword,
+            full_token_analysis,
+            intent_analysis,
+            evolution_potential,
+            trends
+        )
+        # Generate JSON results
+        json_results = {
+            "keyword": keyword,
+            "tokenAnalysis": full_token_analysis,
+            "intentAnalysis": intent_analysis,
+            "evolutionPotential": evolution_potential,
+            "predictedTrends": trends,
+            "forecast": {
+                "months": forecast_months,
+                "scenario": growth_scenario,
+                "data": evolution_data
+            },
+            "serpResults": serp_results
+        }
+        progress(1.0, desc="Analysis complete!")
+        return token_viz_html, analysis_html, json_results, evolution_chart, serp_html, ranking_chart, keyword
+    except Exception as e:
+        error_message = f"<div style='color:red;padding:20px;'>Error analyzing keyword: {str(e)}</div>"
+        print(f"Error in analyze_keyword: {str(e)}")
+        return error_message, error_message, None, None, None, None, None
+# Create the Gradio interface with AI Snipper styling
 with gr.Blocks(
     css=ai_snipper_css,
     title="🧬 AI Snipper Keyword DNA Analyzer",
     # Custom header with DNA theme
     gr.HTML("""
+    <div style="text-align: center; padding: 2rem 0; margin-bottom: 2rem;">
+        <h1 style="font-size: 3rem; font-weight: 800; margin-bottom: 1rem; background: linear-gradient(135deg, #06b6d4, #3b82f6, #8b5cf6); -webkit-background-clip: text; -webkit-text-fill-color: transparent; background-clip: text;">
+            🧬 Keyword DNA Analyzer
+        </h1>
+        <p style="font-size: 1.2rem; color: #94a3b8; margin-top: 1rem; font-weight: 400;">
             Decode the genetic structure of your keywords with AI-powered analysis
         </p>
     </div>
             # Status indicator with custom styling
             status_html = gr.HTML(
+                '<div style="text-align: center; padding: 1rem; border-radius: 8px; margin: 1rem 0; font-weight: 500; background: rgba(6, 182, 212, 0.1); border: 1px solid #06b6d4; color: #06b6d4;">🚀 Enter a keyword and click "Analyze DNA" to begin</div>'
             )
             # Main analyze button
             analyze_btn = gr.Button(
                 "🧬 Analyze DNA",
+                variant="primary"
             )
             # Example buttons with custom styling
             gr.Markdown("### 💡 Try These Examples")
+            with gr.Row():
                 example_btns = []
                 examples = [
                     "preprocessing",
                 with gr.Tab("💾 Raw Data"):
                     json_output = gr.JSON()
+    # Event handlers
     voice_submit_btn.click(
         handle_voice_input,
         inputs=[audio_input],
     # Updated status messages with custom styling
     analyze_btn.click(
+        lambda: '<div style="text-align: center; padding: 1rem; border-radius: 8px; margin: 1rem 0; font-weight: 500; background: rgba(6, 182, 212, 0.1); border: 1px solid #06b6d4; color: #06b6d4;">🔄 Loading models and analyzing... This may take a moment.</div>',
         outputs=status_html
     ).then(
         analyze_keyword,
         inputs=[input_text, forecast_months, growth_scenario, include_serp],
         outputs=[token_viz_html, analysis_html, json_output, evolution_chart, serp_html, ranking_chart, input_text]
     ).then(
+        lambda: '<div style="text-align: center; padding: 1rem; border-radius: 8px; margin: 1rem 0; font-weight: 500; background: rgba(20, 184, 166, 0.1); border: 1px solid #14b8a6; color: #14b8a6;">✅ Analysis complete! Check the results above.</div>',
         outputs=status_html
     )
             inputs=[btn],
             outputs=[input_text]
         ).then(
+            lambda: '<div style="text-align: center; padding: 1rem; border-radius: 8px; margin: 1rem 0; font-weight: 500; background: rgba(6, 182, 212, 0.1); border: 1px solid #06b6d4; color: #06b6d4;">🔄 Loading models and analyzing... This may take a moment.</div>',
             outputs=status_html
         ).then(
             analyze_keyword,
             inputs=[input_text, forecast_months, growth_scenario, include_serp],
             outputs=[token_viz_html, analysis_html, json_output, evolution_chart, serp_html, ranking_chart, input_text]
         ).then(
+            lambda: '<div style="text-align: center; padding: 1rem; border-radius: 8px; margin: 1rem 0; font-weight: 500; background: rgba(20, 184, 166, 0.1); border: 1px solid #14b8a6; color: #14b8a6;">✅ Analysis complete! Check the results above.</div>',
             outputs=status_html
         )
 # Launch configuration
 if __name__ == "__main__":
+    demo.launch()