import gradio as gr import numpy as np import seaborn as sns import matplotlib.pyplot as plt from sklearn.metrics.pairwise import cosine_similarity from sentence_transformers import SentenceTransformer import pandas as pd import re # Load the knowledge base encoded_df = pd.read_csv('encoded_df.csv').drop(columns=['Unnamed: 0']) # Initialize the Sentence Transformer model model = SentenceTransformer('all-MiniLM-L6-v2') # Function to preprocess text def preprocess_text(text): text = text.lower() # Lowercase text = re.sub(r'[^a-zA-Z\s]', '', text) # Remove special characters return text def generate_graphs(new_story, metric): # Preprocess the new story new_story = preprocess_text(new_story) # Encode the new story new_story_vector = model.encode([new_story])[0] # Calculate similarity with knowledge base stories knowledge_base_vectors = encoded_df.iloc[:, :-7].values # Exclude metrics columns similarities = cosine_similarity([new_story_vector], knowledge_base_vectors)[0] # Filter indices with similarity > 0.85 similar_indexes = np.where(similarities > 0.7)[0] print('Similar indexes:',similar_indexes) if len(similar_indexes) == 0: return None,None # Get metric distribution for stories with similarity > 0.85 metric_distribution = encoded_df.iloc[similar_indexes][metric].values story_labels = [f"Story {i+1}" for i in similar_indexes] # Plot similarity distribution for all similar stories sim_dist_plot = plt.figure(figsize=(10, 6)) sns.kdeplot(new_story_vector, shade=False, label="New Story", color='blue', linewidth=2) for idx in similar_indexes: most_similar_vector = encoded_df.iloc[idx, :-7].values sns.kdeplot(most_similar_vector, shade=False, label=f"Story {idx+1}", alpha=0.5) plt.title(f"Similarity Distribution: New Story vs Similar Stories ({metric})", fontsize=14) plt.xlabel("Vector Values", fontsize=12) plt.ylabel("Density", fontsize=12) plt.legend(title="Stories") plt.tight_layout() # Create a bar graph for the metric distribution metric_dist_plot = plt.figure(figsize=(10, 6)) sns.barplot(x=story_labels, y=metric_distribution, palette="viridis") plt.title(f"{metric} Distribution for Similar Stories", fontsize=14) plt.xlabel("Story(most similar to least)", fontsize=12) plt.ylabel(metric, fontsize=12) plt.xticks(rotation=90) plt.tight_layout() return sim_dist_plot, metric_dist_plot # Gradio interface for a specific metric def gradio_interface(new_story, metric): sim_dist_plot, metric_dist_plot = generate_graphs(new_story, metric) if sim_dist_plot is None: return None, None return sim_dist_plot, metric_dist_plot # Create the Gradio interface with tabs def create_tab(metric): return gr.Interface( fn=lambda new_story: gradio_interface(new_story, metric), inputs=gr.Textbox(label="Enter a story", lines=10, placeholder="Enter the story here..."), outputs=[gr.Plot(label="Similarity Distribution"), gr.Plot(label=f"{metric} Distribution")], title=f"Story Similarity and {metric} Analysis", description=f"Enter a new story to compare with the knowledge base. View similarity distributions and {metric} of stories with similarity > 0.85." ) likes_tab = create_tab("likesCount") comments_tab = create_tab("commentCount") shares_tab = create_tab("shareCount") # Combine tabs into a single app iface = gr.TabbedInterface( interface_list=[likes_tab, comments_tab, shares_tab], tab_names=["Likes Analytics", "Comments Analytics", "Shares Analytics"] ) # Launch the interface iface.launch(share=True)