Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -19,8 +19,7 @@ def preprocess_text(text):
|
|
19 |
text = re.sub(r'[^a-zA-Z\s]', '', text) # Remove special characters
|
20 |
return text
|
21 |
|
22 |
-
|
23 |
-
def generate_graphs(new_story):
|
24 |
# Preprocess the new story
|
25 |
new_story = preprocess_text(new_story)
|
26 |
|
@@ -28,57 +27,70 @@ def generate_graphs(new_story):
|
|
28 |
new_story_vector = model.encode([new_story])[0]
|
29 |
|
30 |
# Calculate similarity with knowledge base stories
|
31 |
-
knowledge_base_vectors = encoded_df.iloc[:, :-7].values # Exclude
|
32 |
similarities = cosine_similarity([new_story_vector], knowledge_base_vectors)[0]
|
33 |
|
34 |
-
# Filter indices with similarity > 0.
|
35 |
similar_indexes = np.where(similarities > 0.85)[0]
|
|
|
36 |
|
37 |
if len(similar_indexes) == 0:
|
38 |
-
return None,
|
39 |
|
40 |
-
# Get
|
41 |
-
|
42 |
story_labels = [f"Story {i+1}" for i in similar_indexes]
|
43 |
|
44 |
# Plot similarity distribution for all similar stories
|
45 |
-
plt.figure(figsize=(10, 6))
|
46 |
sns.kdeplot(new_story_vector, shade=False, label="New Story", color='blue', linewidth=2)
|
47 |
for idx in similar_indexes:
|
48 |
most_similar_vector = encoded_df.iloc[idx, :-7].values
|
49 |
sns.kdeplot(most_similar_vector, shade=False, label=f"Story {idx+1}", alpha=0.5)
|
50 |
-
plt.title("Similarity Distribution: New Story vs Similar Stories", fontsize=14)
|
51 |
plt.xlabel("Vector Values", fontsize=12)
|
52 |
plt.ylabel("Density", fontsize=12)
|
53 |
plt.legend(title="Stories")
|
54 |
-
|
55 |
-
|
56 |
-
# Create a bar graph for
|
57 |
-
plt.figure(figsize=(10, 6))
|
58 |
-
sns.barplot(x=story_labels, y=
|
59 |
-
plt.title("
|
60 |
-
plt.xlabel("Story Index (Similarity > 0.
|
61 |
-
plt.ylabel(
|
62 |
plt.xticks(rotation=90)
|
63 |
-
|
|
|
|
|
64 |
|
65 |
-
return sim_dist_plot, likes_dist_plot
|
66 |
|
67 |
-
# Gradio interface
|
68 |
-
def gradio_interface(new_story):
|
69 |
-
sim_dist_plot,
|
70 |
if sim_dist_plot is None:
|
71 |
-
return
|
72 |
-
return sim_dist_plot,
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
)
|
83 |
|
84 |
# Launch the interface
|
|
|
19 |
text = re.sub(r'[^a-zA-Z\s]', '', text) # Remove special characters
|
20 |
return text
|
21 |
|
22 |
+
def generate_graphs(new_story, metric):
|
|
|
23 |
# Preprocess the new story
|
24 |
new_story = preprocess_text(new_story)
|
25 |
|
|
|
27 |
new_story_vector = model.encode([new_story])[0]
|
28 |
|
29 |
# Calculate similarity with knowledge base stories
|
30 |
+
knowledge_base_vectors = encoded_df.iloc[:, :-7].values # Exclude metrics columns
|
31 |
similarities = cosine_similarity([new_story_vector], knowledge_base_vectors)[0]
|
32 |
|
33 |
+
# Filter indices with similarity > 0.85
|
34 |
similar_indexes = np.where(similarities > 0.85)[0]
|
35 |
+
print('Similar indexes:',similar_indexes)
|
36 |
|
37 |
if len(similar_indexes) == 0:
|
38 |
+
return None,None
|
39 |
|
40 |
+
# Get metric distribution for stories with similarity > 0.85
|
41 |
+
metric_distribution = encoded_df.iloc[similar_indexes][metric].values
|
42 |
story_labels = [f"Story {i+1}" for i in similar_indexes]
|
43 |
|
44 |
# Plot similarity distribution for all similar stories
|
45 |
+
sim_dist_plot = plt.figure(figsize=(10, 6))
|
46 |
sns.kdeplot(new_story_vector, shade=False, label="New Story", color='blue', linewidth=2)
|
47 |
for idx in similar_indexes:
|
48 |
most_similar_vector = encoded_df.iloc[idx, :-7].values
|
49 |
sns.kdeplot(most_similar_vector, shade=False, label=f"Story {idx+1}", alpha=0.5)
|
50 |
+
plt.title(f"Similarity Distribution: New Story vs Similar Stories ({metric})", fontsize=14)
|
51 |
plt.xlabel("Vector Values", fontsize=12)
|
52 |
plt.ylabel("Density", fontsize=12)
|
53 |
plt.legend(title="Stories")
|
54 |
+
plt.tight_layout()
|
55 |
+
|
56 |
+
# Create a bar graph for the metric distribution
|
57 |
+
metric_dist_plot = plt.figure(figsize=(10, 6))
|
58 |
+
sns.barplot(x=story_labels, y=metric_distribution, palette="viridis")
|
59 |
+
plt.title(f"{metric} Distribution for Similar Stories", fontsize=14)
|
60 |
+
plt.xlabel("Story Index (Similarity > 0.85)", fontsize=12)
|
61 |
+
plt.ylabel(metric, fontsize=12)
|
62 |
plt.xticks(rotation=90)
|
63 |
+
plt.tight_layout()
|
64 |
+
|
65 |
+
return sim_dist_plot, metric_dist_plot
|
66 |
|
|
|
67 |
|
68 |
+
# Gradio interface for a specific metric
|
69 |
+
def gradio_interface(new_story, metric):
|
70 |
+
sim_dist_plot, metric_dist_plot = generate_graphs(new_story, metric)
|
71 |
if sim_dist_plot is None:
|
72 |
+
return None, None
|
73 |
+
return sim_dist_plot, metric_dist_plot
|
74 |
+
|
75 |
+
|
76 |
+
# Create the Gradio interface with tabs
|
77 |
+
def create_tab(metric):
|
78 |
+
return gr.Interface(
|
79 |
+
fn=lambda new_story: gradio_interface(new_story, metric),
|
80 |
+
inputs=gr.Textbox(label="Enter a story", lines=10, placeholder="Enter the story here..."),
|
81 |
+
outputs=[gr.Plot(label="Similarity Distribution"), gr.Plot(label=f"{metric} Distribution")],
|
82 |
+
title=f"Story Similarity and {metric} Analysis",
|
83 |
+
description=f"Enter a new story to compare with the knowledge base. View similarity distributions and {metric} of stories with similarity > 0.85."
|
84 |
+
)
|
85 |
+
|
86 |
+
likes_tab = create_tab("likesCount")
|
87 |
+
comments_tab = create_tab("commentCount")
|
88 |
+
shares_tab = create_tab("shareCount")
|
89 |
+
|
90 |
+
# Combine tabs into a single app
|
91 |
+
iface = gr.TabbedInterface(
|
92 |
+
interface_list=[likes_tab, comments_tab, shares_tab],
|
93 |
+
tab_names=["Likes Analytics", "Comments Analytics", "Shares Analytics"]
|
94 |
)
|
95 |
|
96 |
# Launch the interface
|