Spaces:
Sleeping
Sleeping
File size: 3,696 Bytes
b6bf4ee 57f6880 b6bf4ee 57f6880 b6bf4ee 57f6880 b6bf4ee 2526e11 b6bf4ee 57f6880 2526e11 57f6880 b6bf4ee 2526e11 50da970 2526e11 b6bf4ee 57f6880 2526e11 b6bf4ee 2526e11 57f6880 b6bf4ee 57f6880 2526e11 57f6880 2526e11 57f6880 b6bf4ee 2526e11 5585a4d 2526e11 57f6880 2526e11 b27faf2 b6bf4ee 2526e11 57f6880 2526e11 b6bf4ee |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
import gradio as gr
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
import pandas as pd
import re
# Load the knowledge base
encoded_df = pd.read_csv('encoded_df.csv').drop(columns=['Unnamed: 0'])
# Initialize the Sentence Transformer model
model = SentenceTransformer('all-MiniLM-L6-v2')
# Function to preprocess text
def preprocess_text(text):
text = text.lower() # Lowercase
text = re.sub(r'[^a-zA-Z\s]', '', text) # Remove special characters
return text
def generate_graphs(new_story, metric):
# Preprocess the new story
new_story = preprocess_text(new_story)
# Encode the new story
new_story_vector = model.encode([new_story])[0]
# Calculate similarity with knowledge base stories
knowledge_base_vectors = encoded_df.iloc[:, :-7].values # Exclude metrics columns
similarities = cosine_similarity([new_story_vector], knowledge_base_vectors)[0]
# Filter indices with similarity > 0.85
similar_indexes = np.where(similarities > 0.85)[0]
print('Similar indexes:',similar_indexes)
if len(similar_indexes) == 0:
return None,None
# Get metric distribution for stories with similarity > 0.85
metric_distribution = encoded_df.iloc[similar_indexes][metric].values
story_labels = [f"Story {i+1}" for i in similar_indexes]
# Plot similarity distribution for all similar stories
sim_dist_plot = plt.figure(figsize=(10, 6))
sns.kdeplot(new_story_vector, shade=False, label="New Story", color='blue', linewidth=2)
for idx in similar_indexes:
most_similar_vector = encoded_df.iloc[idx, :-7].values
sns.kdeplot(most_similar_vector, shade=False, label=f"Story {idx+1}", alpha=0.5)
plt.title(f"Similarity Distribution: New Story vs Similar Stories ({metric})", fontsize=14)
plt.xlabel("Vector Values", fontsize=12)
plt.ylabel("Density", fontsize=12)
plt.legend(title="Stories")
plt.tight_layout()
# Create a bar graph for the metric distribution
metric_dist_plot = plt.figure(figsize=(10, 6))
sns.barplot(x=story_labels, y=metric_distribution, palette="viridis")
plt.title(f"{metric} Distribution for Similar Stories", fontsize=14)
plt.xlabel("Story(most similar to least)", fontsize=12)
plt.ylabel(metric, fontsize=12)
plt.xticks(rotation=90)
plt.tight_layout()
return sim_dist_plot, metric_dist_plot
# Gradio interface for a specific metric
def gradio_interface(new_story, metric):
sim_dist_plot, metric_dist_plot = generate_graphs(new_story, metric)
if sim_dist_plot is None:
return None, None
return sim_dist_plot, metric_dist_plot
# Create the Gradio interface with tabs
def create_tab(metric):
return gr.Interface(
fn=lambda new_story: gradio_interface(new_story, metric),
inputs=gr.Textbox(label="Enter a story", lines=10, placeholder="Enter the story here..."),
outputs=[gr.Plot(label="Similarity Distribution"), gr.Plot(label=f"{metric} Distribution")],
title=f"Story Similarity and {metric} Analysis",
description=f"Enter a new story to compare with the knowledge base. View similarity distributions and {metric} of stories with similarity > 0.85."
)
likes_tab = create_tab("likesCount")
comments_tab = create_tab("commentCount")
shares_tab = create_tab("shareCount")
# Combine tabs into a single app
iface = gr.TabbedInterface(
interface_list=[likes_tab, comments_tab, shares_tab],
tab_names=["Likes Analytics", "Comments Analytics", "Shares Analytics"]
)
# Launch the interface
iface.launch(share=True)
|