Spaces:
Sleeping
Sleeping
import gradio as gr | |
import numpy as np | |
import seaborn as sns | |
import matplotlib.pyplot as plt | |
from sklearn.metrics.pairwise import cosine_similarity | |
from sentence_transformers import SentenceTransformer | |
import pandas as pd | |
import re | |
# Load the knowledge base | |
encoded_df = pd.read_csv('encoded_df.csv').drop(columns=['Unnamed: 0']) | |
# Initialize the Sentence Transformer model | |
model = SentenceTransformer('all-MiniLM-L6-v2') | |
# Function to preprocess text | |
def preprocess_text(text): | |
text = text.lower() # Lowercase | |
text = re.sub(r'[^a-zA-Z\s]', '', text) # Remove special characters | |
return text | |
# Function to generate graphs for stories with similarity > 0.8 | |
def generate_graphs(new_story): | |
# Preprocess the new story | |
new_story = preprocess_text(new_story) | |
# Encode the new story | |
new_story_vector = model.encode([new_story])[0] | |
# Calculate similarity with knowledge base stories | |
knowledge_base_vectors = encoded_df.iloc[:, :-7].values # Exclude 'likesCount' | |
similarities = cosine_similarity([new_story_vector], knowledge_base_vectors)[0] | |
# Filter indices with similarity > 0.8 | |
similar_indexes = np.where(similarities > 0.85)[0] | |
if len(similar_indexes) == 0: | |
return None, "No stories have a similarity > 0.85." | |
# Get likesCount for stories with similarity > 0.8 | |
likes_distribution = encoded_df.iloc[similar_indexes]['likesCount'].values | |
story_labels = [f"Story {i+1}" for i in similar_indexes] | |
# Plot similarity distribution for all similar stories | |
plt.figure(figsize=(10, 6)) | |
sns.kdeplot(new_story_vector, shade=False, label="New Story", color='blue', linewidth=2) | |
for idx in similar_indexes: | |
most_similar_vector = encoded_df.iloc[idx, :-7].values | |
sns.kdeplot(most_similar_vector, shade=False, label=f"Story {idx+1}", alpha=0.5) | |
plt.title("Similarity Distribution: New Story vs Similar Stories", fontsize=14) | |
plt.xlabel("Vector Values", fontsize=12) | |
plt.ylabel("Density", fontsize=12) | |
plt.legend(title="Stories") | |
sim_dist_plot = plt.gcf() | |
# Create a bar graph for likes distribution | |
plt.figure(figsize=(10, 6)) | |
sns.barplot(x=story_labels, y=likes_distribution, palette="viridis") | |
plt.title("LikesCount Distribution for Similar Stories", fontsize=14) | |
plt.xlabel("Story Index (Similarity > 0.8)", fontsize=12) | |
plt.ylabel("LikesCount", fontsize=12) | |
plt.xticks(rotation=90) | |
likes_dist_plot = plt.gcf() | |
return sim_dist_plot, likes_dist_plot | |
# Gradio interface | |
def gradio_interface(new_story): | |
sim_dist_plot, likes_dist_plot = generate_graphs(new_story) | |
if sim_dist_plot is None: | |
return "No stories have a similarity > 0.8.", None | |
return sim_dist_plot, likes_dist_plot | |
# Create the Gradio interface | |
iface = gr.Interface( | |
fn=gradio_interface, | |
inputs=gr.Textbox(label="Enter a story", lines=10, placeholder="Enter the story here..."), | |
outputs=[gr.Plot(label="Similarity Distribution"), gr.Plot(label="Likes Distribution")], | |
title="Story Similarity and Likes Analysis", | |
description="Enter a new story to compare with the knowledge base. " | |
"View similarity distributions and likes of stories with similarity > 0.8." | |
) | |
# Launch the interface | |
iface.launch(share=True) | |