Spaces:

Sa-m
/

manifesto-explainer

Running

App Files Files Community

Sa-m commited on Jul 28

Commit

140eb89

verified ·

1 Parent(s): 756209b

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -37

app.py CHANGED Viewed

@@ -288,77 +288,107 @@ def fDistancePlot(text2Party):
     return safe_plot(plot_func)
 def DispersionPlot(textParty):
-    """Generates the word dispersion plot."""
-    buf = None # Initialize buffer
     try:
-        word_tokens_party = word_tokenize(textParty)
-        print(f"Debug DispersionPlot: Total tokens: {len(word_tokens_party)}") # Debug print
         if not word_tokens_party:
             print("Warning: No tokens found for dispersion plot.")
             return None
-        moby = Text(word_tokens_party)
         fdistance = FreqDist(word_tokens_party)
-        print(f"Debug DispersionPlot: FreqDist sample: {list(fdistance.most_common(10))}") # Debug print
         # --- Improved word selection logic ---
-        # Get common words, handle potential IndexError if less than 5 unique words
-        common_words_raw = fdistance.most_common(15) # Check a few more common words
-        # Filter: length > 2, isalpha (to avoid punctuation/non-informative), not just digits
-        common_words_filtered = [(word, freq) for word, freq in common_words_raw if len(word) > 2 and word.isalpha() and not word.isdigit()]
-        print(f"Debug DispersionPlot: Filtered common words: {common_words_filtered}") # Debug print
         # Select top 5 from filtered list
-        if len(common_words_filtered) < 5:
-            word_Lst = [word for word, _ in common_words_filtered]
-        else:
-            word_Lst = [common_words_filtered[x][0] for x in range(5)]
-        # Final check: Ensure words are present in the Text object (moby)
-        final_word_list = [word for word in word_Lst if word in moby] # Check membership in the Text object
-        print(f"Debug DispersionPlot: Final word list for plot: {final_word_list}") # Debug print
         if not final_word_list:
-            print("Warning: No suitable words found for dispersion plot after filtering and checking membership.")
             # Create a simple plot indicating no data
             fig, ax = plt.subplots(figsize=(8, 3))
             ax.text(0.5, 0.5, "No suitable words found for dispersion plot", ha='center', va='center', transform=ax.transAxes)
             ax.set_xlim(0, 1)
             ax.set_ylim(0, 1)
-            ax.axis('off') # Hide axes for the message
             fig.suptitle('Dispersion Plot')
         else:
-            # --- Manage figure explicitly without passing 'ax' ---
-            fig = plt.figure(figsize=(10, 5)) # Create figure explicitly
-            plt.title('Dispersion Plot')
-            # Call dispersion_plot with the verified word list
-            moby.dispersion_plot(final_word_list)
             plt.tight_layout()
         buf = BytesIO()
-        # Handle potential apply_aspect error for dispersion plot
         try:
-            fig.savefig(buf, format='png', bbox_inches='tight')
         except AttributeError as ae:
             if "apply_aspect" in str(ae):
-                 print(f"Warning: bbox_inches='tight' failed for Dispersion Plot ({ae}), saving without it.")
-                 buf.seek(0)
-                 buf = BytesIO() # Get a fresh buffer
-                 fig.savefig(buf, format='png')
             else:
-                 raise # Re-raise if it's a different AttributeError
         buf.seek(0)
         img = Image.open(buf)
-        plt.close(fig) # Close the specific figure created
         return img
     except Exception as e:
         print(f"Dispersion plot error: {e}")
         if buf:
-            buf.close() # Ensure buffer is closed on error
         traceback.print_exc()
-        plt.close('all') # Aggressive close on error
-        return None # Return None on error
 def word_cloud_generator(parsed_text_name, text_Party):
     """Generates the word cloud image."""

     return safe_plot(plot_func)
 def DispersionPlot(textParty):
+    """
+    Generates a dispersion plot using Matplotlib.
+    Shows the positions of the most common words along the text.
+    """
+    buf = None
     try:
+        word_tokens_party = word_tokenize(textParty.lower()) # Lowercase for matching
+        print(f"Debug DispersionPlot: Total tokens: {len(word_tokens_party)}")
         if not word_tokens_party:
             print("Warning: No tokens found for dispersion plot.")
             return None
         fdistance = FreqDist(word_tokens_party)
+        print(f"Debug DispersionPlot: FreqDist sample: {list(fdistance.most_common(10))}")
         # --- Improved word selection logic ---
+        common_words_raw = fdistance.most_common(15)
+        # Filter words: length > 2, alphabetic, not just digits
+        common_words_filtered = [
+            (word, freq) for word, freq in common_words_raw
+            if len(word) > 2 and word.isalpha() and not word.isdigit()
+        ]
+        print(f"Debug DispersionPlot: Filtered common words: {common_words_filtered}")
         # Select top 5 from filtered list
+        final_word_list = [word for word, _ in common_words_filtered[:5]]
+        print(f"Debug DispersionPlot: Final word list for plot: {final_word_list}")
         if not final_word_list:
+            print("Warning: No suitable words found for dispersion plot.")
             # Create a simple plot indicating no data
             fig, ax = plt.subplots(figsize=(8, 3))
             ax.text(0.5, 0.5, "No suitable words found for dispersion plot", ha='center', va='center', transform=ax.transAxes)
             ax.set_xlim(0, 1)
             ax.set_ylim(0, 1)
+            ax.axis('off')
             fig.suptitle('Dispersion Plot')
         else:
+            # --- Create the dispersion plot manually ---
+            fig, ax = plt.subplots(figsize=(12, 6))
+            # X-axis: position in the text (token index)
+            x = list(range(len(word_tokens_party)))
+            # Y-axis: will be offset for each word for visualization
+            # We'll plot a scatter point for each occurrence of the target words
+            colors = plt.cm.get_cmap('tab10', len(final_word_list))
+            for i, word in enumerate(final_word_list):
+                # Find all indices where the word occurs
+                offsets = [j for j, token in enumerate(word_tokens_party) if token == word]
+                y_positions = [i + 1] * len(offsets) # Offset y-position for each word
+                ax.scatter(offsets, y_positions, label=word, color=colors(i), alpha=0.7, s=30) # s is marker size
+            ax.set_xlabel("Position in Text (Token Index)")
+            ax.set_ylabel("Words")
+            ax.set_title("Dispersion Plot")
+            # Set y-ticks to correspond to the words
+            ax.set_yticks(range(1, len(final_word_list) + 1))
+            ax.set_yticklabels(final_word_list)
+            # Invert y-axis so the first word in the list is at the top
+            ax.invert_yaxis()
+            # Add grid for better readability
+            ax.grid(True, axis='x', linestyle='--', alpha=0.5)
+            # Add legend
+            # ax.legend(title="Words", bbox_to_anchor=(1.05, 1), loc='upper left') # Place legend outside plot
+            # Or, include legend inside if space allows and it's not too cluttered
+            # For simplicity inside the plot area (adjust if needed)
+            # ax.legend(title="Words")
             plt.tight_layout()
         buf = BytesIO()
+        # Handle potential apply_aspect error
         try:
+            fig.savefig(buf, format='png', bbox_inches='tight', dpi=150) # Added dpi for clarity
         except AttributeError as ae:
             if "apply_aspect" in str(ae):
+                print(f"Warning: bbox_inches='tight' failed for Dispersion Plot ({ae}), saving without it.")
+                buf.seek(0)
+                buf = BytesIO()
+                fig.savefig(buf, format='png', dpi=150)
             else:
+                raise
         buf.seek(0)
         img = Image.open(buf)
+        plt.close(fig)
         return img
     except Exception as e:
         print(f"Dispersion plot error: {e}")
         if buf:
+            buf.close()
         traceback.print_exc()
+        plt.close('all')
+        return None
 def word_cloud_generator(parsed_text_name, text_Party):
     """Generates the word cloud image."""