|
import gradio as gr |
|
import numpy as np |
|
import seaborn as sns |
|
import matplotlib.pyplot as plt |
|
from sklearn.metrics.pairwise import cosine_similarity |
|
from sentence_transformers import SentenceTransformer |
|
import pandas as pd |
|
import re |
|
|
|
|
|
encoded_df = pd.read_csv('encoded_df.csv').drop(columns=['Unnamed: 0']) |
|
|
|
|
|
model = SentenceTransformer('all-MiniLM-L6-v2') |
|
|
|
|
|
def preprocess_text(text): |
|
text = text.lower() |
|
text = re.sub(r'[^a-zA-Z\s]', '', text) |
|
return text |
|
|
|
|
|
def generate_graphs(new_story): |
|
|
|
new_story = preprocess_text(new_story) |
|
|
|
|
|
new_story_vector = model.encode([new_story])[0] |
|
|
|
|
|
knowledge_base_vectors = encoded_df.iloc[:, :-7].values |
|
similarities = cosine_similarity([new_story_vector], knowledge_base_vectors)[0] |
|
|
|
|
|
similar_indexes = np.where(similarities > 0.8)[0] |
|
|
|
if len(similar_indexes) == 0: |
|
return None, "No stories have a similarity > 0.85." |
|
|
|
|
|
likes_distribution = encoded_df.iloc[similar_indexes]['likesCount'].values |
|
story_labels = [f"Story {i+1}" for i in similar_indexes] |
|
|
|
|
|
plt.figure(figsize=(10, 6)) |
|
sns.kdeplot(new_story_vector, shade=False, label="New Story", color='blue', linewidth=2) |
|
for idx in similar_indexes: |
|
most_similar_vector = encoded_df.iloc[idx, :-7].values |
|
sns.kdeplot(most_similar_vector, shade=False, label=f"Story {idx+1}", alpha=0.5) |
|
plt.title("Similarity Distribution: New Story vs Similar Stories", fontsize=14) |
|
plt.xlabel("Vector Values", fontsize=12) |
|
plt.ylabel("Density", fontsize=12) |
|
plt.legend(title="Stories") |
|
sim_dist_plot = plt.gcf() |
|
|
|
|
|
plt.figure(figsize=(10, 6)) |
|
sns.barplot(x=story_labels, y=likes_distribution, palette="viridis") |
|
plt.title("LikesCount Distribution for Similar Stories", fontsize=14) |
|
plt.xlabel("Story Index (Similarity > 0.8)", fontsize=12) |
|
plt.ylabel("LikesCount", fontsize=12) |
|
plt.xticks(rotation=90) |
|
likes_dist_plot = plt.gcf() |
|
|
|
return sim_dist_plot, likes_dist_plot |
|
|
|
|
|
def gradio_interface(new_story): |
|
sim_dist_plot, likes_dist_plot = generate_graphs(new_story) |
|
if sim_dist_plot is None: |
|
return "No stories have a similarity > 0.8.", None |
|
return sim_dist_plot, likes_dist_plot |
|
|
|
|
|
iface = gr.Interface( |
|
fn=gradio_interface, |
|
inputs=gr.Textbox(label="Enter a story", lines=10, placeholder="Enter the story here..."), |
|
outputs=[gr.Plot(label="Similarity Distribution"), gr.Plot(label="Likes Distribution")], |
|
title="Story Similarity and Likes Analysis", |
|
description="Enter a new story to compare with the knowledge base. " |
|
"View similarity distributions and likes of stories with similarity > 0.8." |
|
) |
|
|
|
|
|
iface.launch(share=True) |
|
|