Spaces:

DvorakInnovationAI
/

Story-Analytics

Sleeping

App Files Files Community

subashdvorak commited on Jan 21

Commit

b6bf4ee

verified ·

1 Parent(s): 609a2e1

Create app.py

Browse files

Files changed (1) hide show

app.py +84 -0

app.py ADDED Viewed

	@@ -0,0 +1,84 @@

+import gradio as gr
+import numpy as np
+import seaborn as sns
+import matplotlib.pyplot as plt
+from sklearn.metrics.pairwise import cosine_similarity
+from sentence_transformers import SentenceTransformer
+import pandas as pd
+import re
+encoded_df = pd.read_csv('encoded_df.csv').drop(columns=['Unnamed: 0'])
+# Initialize the Sentence Transformer model
+model = SentenceTransformer('all-MiniLM-L6-v2')
+# Function to preprocess text (if required)
+def preprocess_text(text):
+    # Your text preprocessing logic here (e.g., lowercasing, removing special characters)
+    text.lower()  # Placeholder for text preprocessing
+    text = re.sub(r'[^a-zA-Z\s]', '', text)
+    return text
+# Function to generate the graphs
+def generate_graphs(new_story):
+    # Preprocess the new story
+    new_story = preprocess_text(new_story)
+    global model
+    # Encode the new story
+    new_story_vector = model.encode([new_story])[0]
+    # Calculate the similarity with all existing stories in the knowledge base
+    knowledge_base_vectors = encoded_df.iloc[:, :-1].values  # Exclude 'likesCount'
+    print(f"New Story Vector Shape: {new_story_vector.shape}")
+    print(f"Knowledge Base Vector Shape: {knowledge_base_vectors.shape}")
+    similarities = cosine_similarity([new_story_vector], knowledge_base_vectors)
+    # Find the indices of the 5 most similar stories
+    top_5_indices = np.argsort(similarities[0])[::-1][:5]  # Sort similarities and get top 5
+    # Retrieve the LikesCount for the top 5 most similar stories
+    likes_distribution = encoded_df.iloc[top_5_indices]['likesCount'].values
+    sorted_likes_distribution = sorted(likes_distribution, reverse=True)
+    # Create a bar graph for the distribution of the 5 most similar stories
+    plt.figure(figsize=(10, 6))
+    sns.barplot(x=[f"Story {i+1}" for i in range(5)], y=sorted_likes_distribution, palette="viridis")
+    plt.title("LikesCount Distribution for the 5 Most Similar Stories", fontsize=14)
+    plt.xlabel("Story Similarity (Most Similar to Least)", fontsize=12)
+    plt.ylabel("LikesCount", fontsize=12)
+    likes_dist_plot = plt.gcf()
+    # Plot the similarity distribution for the 5 most similar stories
+    plt.figure(figsize=(10, 6))
+    sns.kdeplot([new_story_vector], shade=False, label="New Story", color='blue')
+    for i in top_5_indices:
+        most_similar_vector = encoded_df.iloc[i, :-1].values
+        sns.kdeplot(most_similar_vector, shade=False, label=f"Most Similar Story: {top_5_indices.tolist().index(i)+1}", alpha=0.5)
+    plt.title("Similarity Distribution of New Story and Top 5 Similar Stories", fontsize=14)
+    plt.xlabel("Value", fontsize=12)
+    plt.ylabel("Density", fontsize=12)
+    plt.legend(title="Stories")
+    sim_dist_plot = plt.gcf()
+    return sim_dist_plot,likes_dist_plot
+# Gradio interface
+def gradio_interface(new_story):
+    # Generate and return both plots
+    likes_dist_plot, sim_dist_plot = generate_graphs(new_story)
+    return likes_dist_plot, sim_dist_plot
+# Create the Gradio interface
+iface = gr.Interface(
+    fn=gradio_interface,
+    inputs=gr.Textbox(label="Enter a story", lines=10, placeholder="Enter the story here..."),
+    outputs=[gr.Plot(), gr.Plot()],
+    title="Story Similarity and Likes Distribution",
+    description="Enter a new story to compare it with the knowledge base and get analytics on similarity and likes distribution of the most similar stories."
+)
+# Launch the interface
+iface.launch(share=True)