from datasets import load_dataset
import streamlit as st


@st.cache()
def load_all_repository_names():
    list_of_repo_names = ["test"]
    #list_of_repo_names = load_dataset("bigcode/the-stack-paths", split="train")["repository_name"]
    return list_of_repo_names

st.title("Am I in The Stack?")
st.markdown("This tool lets you check if a repository under a given username is part of [The Stack dataset](https://huggingface.co/datasets/bigcode/the-stack).")
repo_names = load_all_repository_names()
n_repos = len(repo_names)

username = st.text_input("GitHub Username:")

st.markdown("Note: this Space is currently under construction.")

"""
if st.button("Check!"):
    
    
    list_of_repos = []
    progress_bar = st.progress(0.0)
    
    for i in range(n_repos):
        progress_bar.progress((i + 1)/n_repos)
        if repo_names[i].split("/")[0]==username:
            list_of_repos.append(repo_names[i])
    
    if len(list_of_repos)==0:
        st.markdown("There is **no repository** under that username in The Stack.")
    else:
        if len(list_of_repos)==1:
            st.markdown("There is **1 repository** under that username in The Stack:")
        else:
            st.markdown(f"There are **{len(list_of_repos)} repositories** under that username in The Stack:")
        st.text("\n".join(list_of_repos))
        
    """