import streamlit as st
import pandas as pd
from github import Github
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import re

g = Github(st.secrets["ACCESS_TOKEN"])
repo = g.get_repo(st.secrets["REPO_NAME"])

def fetch_data():

    issues_data = []
    
    issues = repo.get_issues(state="all")
    
    for issue in issues:
        issues_data.append(
            {
                'Issue': f"{issue.number} - {issue.title}", 
                'State': issue.state, 
                'Created at': issue.created_at, 
                'Closed at': issue.closed_at,
                'Last update': issue.updated_at,
                'Labels': [label.name for label in issue.labels],
                'Reactions': issue.reactions['total_count'],
                'Comments': issue.comments,
                'URL': issue.html_url
            }
        )
    return pd.DataFrame(issues_data)

def save_data(df):
    df.to_json("issues.json", orient="records", indent=4, index=False)


st.title(f"GitHub Issues Dashboard for {repo.name}")
status = st.status(label="Loading data...", state="running")

try:
    df = pd.read_json("issues.json", convert_dates=["Created at", "Closed at", "Last update"], date_unit="ms")
except:
    df = fetch_data()
    save_data(df)

# Section 1: Issue activity metrics
st.header("Issue activity metrics")

col1, col2, col3 = st.columns(3)

state_counts = df['State'].value_counts()
open_issues = df.loc[df['State'] == 'open']
closed_issues = df.loc[df['State'] == 'closed']
closed_issues['Time to Close'] = closed_issues['Closed at'] - closed_issues['Created at']

with col1:
    st.metric(label="Open issues", value=state_counts['open'])

with col2:
    st.metric(label="Closed issues", value=state_counts['closed'])

with col3:
    average_time_to_close = closed_issues['Time to Close'].mean().days
    st.metric(label="Avg. days to close", value=average_time_to_close)


# TODO Plot: number of open vs closed issues by date

st.subheader("Latest bugs 🐞")
bug_issues = open_issues[open_issues["Labels"].apply(lambda labels: "type: bug" in labels)]
bug_issues = bug_issues[["Issue","Labels","Created at","URL"]]
st.dataframe(
    bug_issues.sort_values(by="Created at", ascending=False),
    hide_index=True,
    column_config={
        "Issue": st.column_config.TextColumn("Issue", width=400),
        "Labels": st.column_config.TextColumn("Labels"),
        "Created at": st.column_config.DatetimeColumn("Created at"),
        "URL": st.column_config.LinkColumn("🔗", display_text="🔗")
    }
)

st.subheader("Latest updates 📝")
st.dataframe(
    open_issues[["Issue","Last update","URL"]].sort_values(by="Last update", ascending=False),
    hide_index=True,
    column_config={
        "Issue": st.column_config.TextColumn("Issue", width=400),
        "Last update": st.column_config.DatetimeColumn("Last update"),
        "URL": st.column_config.LinkColumn("🔗", display_text="🔗")
    }
)

# Section 2: Issue classification
st.header("Issue classification")
col1, col2 = st.columns(2)

## Dataframe: Number of open issues by label.
with col1:
    st.subheader("Top ten labels 🔖")
    open_issues_exploded = open_issues.explode("Labels")
    label_counts = open_issues_exploded.value_counts("Labels").to_frame()

    def generate_labels_link(labels):
        links = []
        for label in labels:
            label = label.replace(" ", "+")
            links.append(f"https://github.com/argilla-io/argilla/issues?q=is:open+is:issue+label:%22{label}%22")
        return links

    label_counts['Link'] = generate_labels_link(label_counts.index)

    st.dataframe(
        label_counts.head(10),
        column_config={
            "Labels": st.column_config.TextColumn("Labels"),
            "count": st.column_config.NumberColumn("Count"),
            "Link": st.column_config.LinkColumn("Link", display_text="🔗")
        }
    )
    
## Cloud of words: Issue titles
with col2:
    st.subheader("Cloud of words ☁️")
    titles = " ".join(open_issues["Issue"])
    titles = re.sub(r'\[.*?\]', '', titles)
    wordcloud = WordCloud(width=800, height=400, background_color="white").generate(titles)
    plt.figure(figsize=(10, 5))
    plt.imshow(wordcloud, interpolation="bilinear")
    plt.axis("off")
    st.pyplot(plt)

# # Community engagement
st.header("Community engagement")
# ## Dataframe: Latest issues open by the community
# ## Dataframe: issues sorted by number of comments
st.subheader("Top engaging issues 💬")
engagement_df = open_issues[["Issue","Reactions","Comments","URL"]].sort_values(by=["Reactions", "Comments"], ascending=False).head(10)
st.dataframe(
    engagement_df,
    hide_index=True,
    use_container_width=True,
    column_config={
        "Issue": st.column_config.TextColumn("Issue", width=400),
        "Reactions": st.column_config.NumberColumn("Reactions", format="%d 👍"),
        "Comments": st.column_config.NumberColumn("Comments", format="%d 💬"),
        "URL": st.column_config.LinkColumn("🔗", display_text="🔗")
    }
)

# ## Cloud of words: Comments??
# ## Dataframe: Contributor leaderboard.

# # Issue dependencies
# st.header("Issue dependencies")
# ## Map: dependencies between issues. Network of issue mentions.x

status.update(label="Checking for updated data...", state="running")
updated_data = fetch_data()
if df.equals(updated_data):
    status.update(label="Data is up to date!", state="complete")
else:
    save_data(updated_data)
    status.update(label="Refresh for updated data!", state="complete")