Spaces:
Sleeping
Sleeping
File size: 3,286 Bytes
b6bf4ee 57f6880 b6bf4ee 57f6880 b6bf4ee 57f6880 b6bf4ee 57f6880 b6bf4ee 57f6880 be6e9e4 57f6880 b6bf4ee 57f6880 50da970 b6bf4ee 57f6880 b6bf4ee 57f6880 b6bf4ee 57f6880 b6bf4ee 57f6880 b27faf2 57f6880 b27faf2 57f6880 b27faf2 57f6880 b6bf4ee 57f6880 b6bf4ee 57f6880 b6bf4ee |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
import gradio as gr
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
import pandas as pd
import re
# Load the knowledge base
encoded_df = pd.read_csv('encoded_df.csv').drop(columns=['Unnamed: 0'])
# Initialize the Sentence Transformer model
model = SentenceTransformer('all-MiniLM-L6-v2')
# Function to preprocess text
def preprocess_text(text):
text = text.lower() # Lowercase
text = re.sub(r'[^a-zA-Z\s]', '', text) # Remove special characters
return text
# Function to generate graphs for stories with similarity > 0.8
def generate_graphs(new_story):
# Preprocess the new story
new_story = preprocess_text(new_story)
# Encode the new story
new_story_vector = model.encode([new_story])[0]
# Calculate similarity with knowledge base stories
knowledge_base_vectors = encoded_df.iloc[:, :-7].values # Exclude 'likesCount'
similarities = cosine_similarity([new_story_vector], knowledge_base_vectors)[0]
# Filter indices with similarity > 0.8
similar_indexes = np.where(similarities > 0.85)[0]
if len(similar_indexes) == 0:
return None, "No stories have a similarity > 0.85."
# Get likesCount for stories with similarity > 0.8
likes_distribution = encoded_df.iloc[similar_indexes]['likesCount'].values
story_labels = [f"Story {i+1}" for i in similar_indexes]
# Plot similarity distribution for all similar stories
plt.figure(figsize=(10, 6))
sns.kdeplot(new_story_vector, shade=False, label="New Story", color='blue', linewidth=2)
for idx in similar_indexes:
most_similar_vector = encoded_df.iloc[idx, :-7].values
sns.kdeplot(most_similar_vector, shade=False, label=f"Story {idx+1}", alpha=0.5)
plt.title("Similarity Distribution: New Story vs Similar Stories", fontsize=14)
plt.xlabel("Vector Values", fontsize=12)
plt.ylabel("Density", fontsize=12)
plt.legend(title="Stories")
sim_dist_plot = plt.gcf()
# Create a bar graph for likes distribution
plt.figure(figsize=(10, 6))
sns.barplot(x=story_labels, y=likes_distribution, palette="viridis")
plt.title("LikesCount Distribution for Similar Stories", fontsize=14)
plt.xlabel("Story Index (Similarity > 0.8)", fontsize=12)
plt.ylabel("LikesCount", fontsize=12)
plt.xticks(rotation=90)
likes_dist_plot = plt.gcf()
return sim_dist_plot, likes_dist_plot
# Gradio interface
def gradio_interface(new_story):
sim_dist_plot, likes_dist_plot = generate_graphs(new_story)
if sim_dist_plot is None:
return "No stories have a similarity > 0.8.", None
return sim_dist_plot, likes_dist_plot
# Create the Gradio interface
iface = gr.Interface(
fn=gradio_interface,
inputs=gr.Textbox(label="Enter a story", lines=10, placeholder="Enter the story here..."),
outputs=[gr.Plot(label="Similarity Distribution"), gr.Plot(label="Likes Distribution")],
title="Story Similarity and Likes Analysis",
description="Enter a new story to compare with the knowledge base. "
"View similarity distributions and likes of stories with similarity > 0.8."
)
# Launch the interface
iface.launch(share=True)
|