File size: 3,696 Bytes
b6bf4ee
 
 
 
 
 
 
 
 
57f6880
b6bf4ee
 
 
 
57f6880
 
b6bf4ee
57f6880
 
b6bf4ee
 
2526e11
b6bf4ee
 
 
 
 
 
57f6880
2526e11
57f6880
b6bf4ee
2526e11
50da970
2526e11
b6bf4ee
57f6880
2526e11
b6bf4ee
2526e11
 
57f6880
b6bf4ee
57f6880
2526e11
57f6880
 
 
 
2526e11
57f6880
b6bf4ee
 
2526e11
 
 
 
 
 
5585a4d
2526e11
57f6880
2526e11
 
 
b27faf2
b6bf4ee
2526e11
 
 
57f6880
2526e11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b6bf4ee
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import gradio as gr
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
import pandas as pd
import re

# Load the knowledge base
encoded_df = pd.read_csv('encoded_df.csv').drop(columns=['Unnamed: 0'])

# Initialize the Sentence Transformer model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Function to preprocess text
def preprocess_text(text):
    text = text.lower()  # Lowercase
    text = re.sub(r'[^a-zA-Z\s]', '', text)  # Remove special characters
    return text

def generate_graphs(new_story, metric):
    # Preprocess the new story
    new_story = preprocess_text(new_story)

    # Encode the new story
    new_story_vector = model.encode([new_story])[0]

    # Calculate similarity with knowledge base stories
    knowledge_base_vectors = encoded_df.iloc[:, :-7].values  # Exclude metrics columns
    similarities = cosine_similarity([new_story_vector], knowledge_base_vectors)[0]

    # Filter indices with similarity > 0.85
    similar_indexes = np.where(similarities > 0.85)[0]
    print('Similar indexes:',similar_indexes)

    if len(similar_indexes) == 0:
        return None,None

    # Get metric distribution for stories with similarity > 0.85
    metric_distribution = encoded_df.iloc[similar_indexes][metric].values
    story_labels = [f"Story {i+1}" for i in similar_indexes]

    # Plot similarity distribution for all similar stories
    sim_dist_plot = plt.figure(figsize=(10, 6))
    sns.kdeplot(new_story_vector, shade=False, label="New Story", color='blue', linewidth=2)
    for idx in similar_indexes:
        most_similar_vector = encoded_df.iloc[idx, :-7].values
        sns.kdeplot(most_similar_vector, shade=False, label=f"Story {idx+1}", alpha=0.5)
    plt.title(f"Similarity Distribution: New Story vs Similar Stories ({metric})", fontsize=14)
    plt.xlabel("Vector Values", fontsize=12)
    plt.ylabel("Density", fontsize=12)
    plt.legend(title="Stories")
    plt.tight_layout()

    # Create a bar graph for the metric distribution
    metric_dist_plot = plt.figure(figsize=(10, 6))
    sns.barplot(x=story_labels, y=metric_distribution, palette="viridis")
    plt.title(f"{metric} Distribution for Similar Stories", fontsize=14)
    plt.xlabel("Story(most similar to least)", fontsize=12)
    plt.ylabel(metric, fontsize=12)
    plt.xticks(rotation=90)
    plt.tight_layout()

    return sim_dist_plot, metric_dist_plot


# Gradio interface for a specific metric
def gradio_interface(new_story, metric):
    sim_dist_plot, metric_dist_plot = generate_graphs(new_story, metric)
    if sim_dist_plot is None:
        return None, None
    return sim_dist_plot, metric_dist_plot


# Create the Gradio interface with tabs
def create_tab(metric):
    return gr.Interface(
        fn=lambda new_story: gradio_interface(new_story, metric),
        inputs=gr.Textbox(label="Enter a story", lines=10, placeholder="Enter the story here..."),
        outputs=[gr.Plot(label="Similarity Distribution"), gr.Plot(label=f"{metric} Distribution")],
        title=f"Story Similarity and {metric} Analysis",
        description=f"Enter a new story to compare with the knowledge base. View similarity distributions and {metric} of stories with similarity > 0.85."
    )

likes_tab = create_tab("likesCount")
comments_tab = create_tab("commentCount")
shares_tab = create_tab("shareCount")

# Combine tabs into a single app
iface = gr.TabbedInterface(
    interface_list=[likes_tab, comments_tab, shares_tab],
    tab_names=["Likes Analytics", "Comments Analytics", "Shares Analytics"]
)

# Launch the interface
iface.launch(share=True)