subashdvorak commited on
Commit
2526e11
·
verified ·
1 Parent(s): 50da970

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -33
app.py CHANGED
@@ -19,8 +19,7 @@ def preprocess_text(text):
19
  text = re.sub(r'[^a-zA-Z\s]', '', text) # Remove special characters
20
  return text
21
 
22
- # Function to generate graphs for stories with similarity > 0.8
23
- def generate_graphs(new_story):
24
  # Preprocess the new story
25
  new_story = preprocess_text(new_story)
26
 
@@ -28,57 +27,70 @@ def generate_graphs(new_story):
28
  new_story_vector = model.encode([new_story])[0]
29
 
30
  # Calculate similarity with knowledge base stories
31
- knowledge_base_vectors = encoded_df.iloc[:, :-7].values # Exclude 'likesCount'
32
  similarities = cosine_similarity([new_story_vector], knowledge_base_vectors)[0]
33
 
34
- # Filter indices with similarity > 0.8
35
  similar_indexes = np.where(similarities > 0.85)[0]
 
36
 
37
  if len(similar_indexes) == 0:
38
- return None, "No stories have a similarity > 0.85."
39
 
40
- # Get likesCount for stories with similarity > 0.8
41
- likes_distribution = encoded_df.iloc[similar_indexes]['likesCount'].values
42
  story_labels = [f"Story {i+1}" for i in similar_indexes]
43
 
44
  # Plot similarity distribution for all similar stories
45
- plt.figure(figsize=(10, 6))
46
  sns.kdeplot(new_story_vector, shade=False, label="New Story", color='blue', linewidth=2)
47
  for idx in similar_indexes:
48
  most_similar_vector = encoded_df.iloc[idx, :-7].values
49
  sns.kdeplot(most_similar_vector, shade=False, label=f"Story {idx+1}", alpha=0.5)
50
- plt.title("Similarity Distribution: New Story vs Similar Stories", fontsize=14)
51
  plt.xlabel("Vector Values", fontsize=12)
52
  plt.ylabel("Density", fontsize=12)
53
  plt.legend(title="Stories")
54
- sim_dist_plot = plt.gcf()
55
-
56
- # Create a bar graph for likes distribution
57
- plt.figure(figsize=(10, 6))
58
- sns.barplot(x=story_labels, y=likes_distribution, palette="viridis")
59
- plt.title("LikesCount Distribution for Similar Stories", fontsize=14)
60
- plt.xlabel("Story Index (Similarity > 0.8)", fontsize=12)
61
- plt.ylabel("LikesCount", fontsize=12)
62
  plt.xticks(rotation=90)
63
- likes_dist_plot = plt.gcf()
 
 
64
 
65
- return sim_dist_plot, likes_dist_plot
66
 
67
- # Gradio interface
68
- def gradio_interface(new_story):
69
- sim_dist_plot, likes_dist_plot = generate_graphs(new_story)
70
  if sim_dist_plot is None:
71
- return "No stories have a similarity > 0.8.", None
72
- return sim_dist_plot, likes_dist_plot
73
-
74
- # Create the Gradio interface
75
- iface = gr.Interface(
76
- fn=gradio_interface,
77
- inputs=gr.Textbox(label="Enter a story", lines=10, placeholder="Enter the story here..."),
78
- outputs=[gr.Plot(label="Similarity Distribution"), gr.Plot(label="Likes Distribution")],
79
- title="Story Similarity and Likes Analysis",
80
- description="Enter a new story to compare with the knowledge base. "
81
- "View similarity distributions and likes of stories with similarity > 0.8."
 
 
 
 
 
 
 
 
 
 
 
82
  )
83
 
84
  # Launch the interface
 
19
  text = re.sub(r'[^a-zA-Z\s]', '', text) # Remove special characters
20
  return text
21
 
22
+ def generate_graphs(new_story, metric):
 
23
  # Preprocess the new story
24
  new_story = preprocess_text(new_story)
25
 
 
27
  new_story_vector = model.encode([new_story])[0]
28
 
29
  # Calculate similarity with knowledge base stories
30
+ knowledge_base_vectors = encoded_df.iloc[:, :-7].values # Exclude metrics columns
31
  similarities = cosine_similarity([new_story_vector], knowledge_base_vectors)[0]
32
 
33
+ # Filter indices with similarity > 0.85
34
  similar_indexes = np.where(similarities > 0.85)[0]
35
+ print('Similar indexes:',similar_indexes)
36
 
37
  if len(similar_indexes) == 0:
38
+ return None,None
39
 
40
+ # Get metric distribution for stories with similarity > 0.85
41
+ metric_distribution = encoded_df.iloc[similar_indexes][metric].values
42
  story_labels = [f"Story {i+1}" for i in similar_indexes]
43
 
44
  # Plot similarity distribution for all similar stories
45
+ sim_dist_plot = plt.figure(figsize=(10, 6))
46
  sns.kdeplot(new_story_vector, shade=False, label="New Story", color='blue', linewidth=2)
47
  for idx in similar_indexes:
48
  most_similar_vector = encoded_df.iloc[idx, :-7].values
49
  sns.kdeplot(most_similar_vector, shade=False, label=f"Story {idx+1}", alpha=0.5)
50
+ plt.title(f"Similarity Distribution: New Story vs Similar Stories ({metric})", fontsize=14)
51
  plt.xlabel("Vector Values", fontsize=12)
52
  plt.ylabel("Density", fontsize=12)
53
  plt.legend(title="Stories")
54
+ plt.tight_layout()
55
+
56
+ # Create a bar graph for the metric distribution
57
+ metric_dist_plot = plt.figure(figsize=(10, 6))
58
+ sns.barplot(x=story_labels, y=metric_distribution, palette="viridis")
59
+ plt.title(f"{metric} Distribution for Similar Stories", fontsize=14)
60
+ plt.xlabel("Story Index (Similarity > 0.85)", fontsize=12)
61
+ plt.ylabel(metric, fontsize=12)
62
  plt.xticks(rotation=90)
63
+ plt.tight_layout()
64
+
65
+ return sim_dist_plot, metric_dist_plot
66
 
 
67
 
68
+ # Gradio interface for a specific metric
69
+ def gradio_interface(new_story, metric):
70
+ sim_dist_plot, metric_dist_plot = generate_graphs(new_story, metric)
71
  if sim_dist_plot is None:
72
+ return None, None
73
+ return sim_dist_plot, metric_dist_plot
74
+
75
+
76
+ # Create the Gradio interface with tabs
77
+ def create_tab(metric):
78
+ return gr.Interface(
79
+ fn=lambda new_story: gradio_interface(new_story, metric),
80
+ inputs=gr.Textbox(label="Enter a story", lines=10, placeholder="Enter the story here..."),
81
+ outputs=[gr.Plot(label="Similarity Distribution"), gr.Plot(label=f"{metric} Distribution")],
82
+ title=f"Story Similarity and {metric} Analysis",
83
+ description=f"Enter a new story to compare with the knowledge base. View similarity distributions and {metric} of stories with similarity > 0.85."
84
+ )
85
+
86
+ likes_tab = create_tab("likesCount")
87
+ comments_tab = create_tab("commentCount")
88
+ shares_tab = create_tab("shareCount")
89
+
90
+ # Combine tabs into a single app
91
+ iface = gr.TabbedInterface(
92
+ interface_list=[likes_tab, comments_tab, shares_tab],
93
+ tab_names=["Likes Analytics", "Comments Analytics", "Shares Analytics"]
94
  )
95
 
96
  # Launch the interface