File size: 3,286 Bytes
b6bf4ee
 
 
 
 
 
 
 
 
57f6880
b6bf4ee
 
 
 
57f6880
 
b6bf4ee
57f6880
 
b6bf4ee
 
57f6880
b6bf4ee
 
 
 
 
 
 
57f6880
be6e9e4
57f6880
b6bf4ee
57f6880
50da970
b6bf4ee
57f6880
 
b6bf4ee
57f6880
 
 
b6bf4ee
57f6880
 
 
 
 
 
 
 
b6bf4ee
 
 
57f6880
 
b27faf2
57f6880
 
 
b27faf2
57f6880
b27faf2
 
57f6880
b6bf4ee
 
 
57f6880
 
 
 
b6bf4ee
 
 
57f6880
 
 
 
 
 
b6bf4ee
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import gradio as gr
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
import pandas as pd
import re

# Load the knowledge base
encoded_df = pd.read_csv('encoded_df.csv').drop(columns=['Unnamed: 0'])

# Initialize the Sentence Transformer model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Function to preprocess text
def preprocess_text(text):
    text = text.lower()  # Lowercase
    text = re.sub(r'[^a-zA-Z\s]', '', text)  # Remove special characters
    return text

# Function to generate graphs for stories with similarity > 0.8
def generate_graphs(new_story):
    # Preprocess the new story
    new_story = preprocess_text(new_story)

    # Encode the new story
    new_story_vector = model.encode([new_story])[0]

    # Calculate similarity with knowledge base stories
    knowledge_base_vectors = encoded_df.iloc[:, :-7].values  # Exclude 'likesCount'
    similarities = cosine_similarity([new_story_vector], knowledge_base_vectors)[0]

    # Filter indices with similarity > 0.8
    similar_indexes = np.where(similarities > 0.85)[0]

    if len(similar_indexes) == 0:
        return None, "No stories have a similarity > 0.85."

    # Get likesCount for stories with similarity > 0.8
    likes_distribution = encoded_df.iloc[similar_indexes]['likesCount'].values
    story_labels = [f"Story {i+1}" for i in similar_indexes]

    # Plot similarity distribution for all similar stories
    plt.figure(figsize=(10, 6))
    sns.kdeplot(new_story_vector, shade=False, label="New Story", color='blue', linewidth=2)
    for idx in similar_indexes:
        most_similar_vector = encoded_df.iloc[idx, :-7].values
        sns.kdeplot(most_similar_vector, shade=False, label=f"Story {idx+1}", alpha=0.5)
    plt.title("Similarity Distribution: New Story vs Similar Stories", fontsize=14)
    plt.xlabel("Vector Values", fontsize=12)
    plt.ylabel("Density", fontsize=12)
    plt.legend(title="Stories")
    sim_dist_plot = plt.gcf()

    # Create a bar graph for likes distribution
    plt.figure(figsize=(10, 6))
    sns.barplot(x=story_labels, y=likes_distribution, palette="viridis")
    plt.title("LikesCount Distribution for Similar Stories", fontsize=14)
    plt.xlabel("Story Index (Similarity > 0.8)", fontsize=12)
    plt.ylabel("LikesCount", fontsize=12)
    plt.xticks(rotation=90)
    likes_dist_plot = plt.gcf()

    return sim_dist_plot, likes_dist_plot

# Gradio interface
def gradio_interface(new_story):
    sim_dist_plot, likes_dist_plot = generate_graphs(new_story)
    if sim_dist_plot is None:
        return "No stories have a similarity > 0.8.", None
    return sim_dist_plot, likes_dist_plot

# Create the Gradio interface
iface = gr.Interface(
    fn=gradio_interface,
    inputs=gr.Textbox(label="Enter a story", lines=10, placeholder="Enter the story here..."),
    outputs=[gr.Plot(label="Similarity Distribution"), gr.Plot(label="Likes Distribution")],
    title="Story Similarity and Likes Analysis",
    description="Enter a new story to compare with the knowledge base. "
                "View similarity distributions and likes of stories with similarity > 0.8."
)

# Launch the interface
iface.launch(share=True)