import gradio as gr import pandas as pd import matplotlib.pyplot as plt from io import StringIO from termcolor import colored # Load the dataset with debug prints def load_dataset(): try: print(colored("Loading dataset...", "blue")) df = pd.read_csv("data/source/all_networks_developer_classification.csv") # Ensure the month_year column is in the correct datetime format df['month_year'] = pd.to_datetime(df['month_year'], format='%B_%Y') # Adjust format if necessary return df except Exception as e: print(colored(f"Error loading dataset: {e}", "red")) raise # Process input and generate plot and classification with debug prints def process_input(input_text, uploaded_file): try: print(colored("Processing input...", "blue")) # Read GitHub handles from input text or uploaded file if uploaded_file is not None: print(colored("Reading from uploaded file...", "blue")) input_text = uploaded_file.read().decode("utf-8") github_handles = [handle.strip() for handle in input_text.split(",")] print(colored(f"GitHub handles: {github_handles}", "blue")) # Load dataset df = load_dataset() # Filter dataset for the provided GitHub handles print(colored("Filtering dataset...", "blue")) filtered_df = df[df['developer'].isin(github_handles)] # Generate plot print(colored("Generating plot...", "blue")) fig, ax = plt.subplots() for handle in github_handles: dev_df = filtered_df[filtered_df['developer'] == handle] dev_df = dev_df.sort_values('month_year') ax.plot(dev_df['month_year'], dev_df['total_commits'], label=handle) ax.set_xlabel("Month") ax.set_ylabel("Number of Commits") ax.legend() plt.xticks(rotation=45) plt.tight_layout() # Generate classification table print(colored("Classifying developers...", "blue")) classification = [] for handle in github_handles: dev_df = filtered_df[filtered_df['developer'] == handle] last_3_months = pd.Timestamp.now() - pd.DateOffset(months=3) recent_activity = dev_df[dev_df['month_year'] >= last_3_months] total_recent_commits = recent_activity['total_commits'].sum() if dev_df.empty: status = "Always been inactive" elif recent_activity.empty: status = "Previously active but no longer" elif total_recent_commits < 20: status = "Low-level active" else: status = "Highly involved" classification.append((handle, status)) classification_df = pd.DataFrame(classification, columns=["Developer", "Classification"]).sort_values("Classification", ascending=False) print(colored("Classification completed.", "blue")) # Return plot and classification table return fig, classification_df except Exception as e: print(colored(f"Error processing input: {e}", "red")) raise # Gradio interface with descriptions and debug prints with gr.Blocks() as app: gr.Markdown("## GitHub Starknet Developer Insights") gr.Markdown(""" This tool allows you to analyze the GitHub activity of developers within the Starknet ecosystem. Enter GitHub handles separated by commas or upload a CSV file with GitHub handles in a single column to see their monthly commit activity and their current involvement classification. """) with gr.Row(): text_input = gr.Textbox(label="Enter GitHub handles separated by commas", placeholder="e.g., user1,user2,user3") file_input = gr.File(label="Or upload a CSV file with GitHub handles in a single column", type="binary") gr.Markdown(""" *Note:* When uploading a CSV, ensure it contains a single column of GitHub handles without a header row. """) btn = gr.Button("Analyze") plot_output = gr.Plot(label="Commits per Month") table_output = gr.Dataframe(label="Developer Classification") btn.click(process_input, inputs=[text_input, file_input], outputs=[plot_output, table_output]) print(colored("Gradio app initialized.", "blue")) if __name__ == "__main__": print(colored("Launching app...", "blue")) app.launch(share=True)