Commit 
							
							·
						
						eea405a
	
0
								Parent(s):
							
							
initial commit
Browse files- .gitignore +1 -0
- README.md +11 -0
- app.py +115 -0
- poetry.lock +0 -0
- pyproject.toml +17 -0
- requirements.txt +54 -0
    	
        .gitignore
    ADDED
    
    | @@ -0,0 +1 @@ | |
|  | 
|  | |
| 1 | 
            +
            __pycache__/
         | 
    	
        README.md
    ADDED
    
    | @@ -0,0 +1,11 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            ---
         | 
| 2 | 
            +
            title: Repo Info
         | 
| 3 | 
            +
            emoji: 👁
         | 
| 4 | 
            +
            colorFrom: red
         | 
| 5 | 
            +
            colorTo: indigo
         | 
| 6 | 
            +
            sdk: gradio
         | 
| 7 | 
            +
            sdk_version: 5.5.0
         | 
| 8 | 
            +
            app_file: app.py
         | 
| 9 | 
            +
            pinned: false
         | 
| 10 | 
            +
            short_description: Get file and branch stats about any public repo
         | 
| 11 | 
            +
            ---
         | 
    	
        app.py
    ADDED
    
    | @@ -0,0 +1,115 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # pylint: disable=no-member
         | 
| 2 | 
            +
            import gradio as gr
         | 
| 3 | 
            +
            import requests
         | 
| 4 | 
            +
            from huggingface_hub import HfApi
         | 
| 5 | 
            +
            import pandas as pd
         | 
| 6 | 
            +
            import plotly.express as px
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            HF_API = HfApi()
         | 
| 9 | 
            +
             | 
| 10 | 
            +
             | 
| 11 | 
            +
            def format_repo_size(r_size: int) -> str:
         | 
| 12 | 
            +
                units = {0: "B", 1: "KB", 2: "MB", 3: "GB", 4: "TB", 5: "PB"}
         | 
| 13 | 
            +
                order = 0
         | 
| 14 | 
            +
                while r_size >= 1024 and order < len(units) - 1:
         | 
| 15 | 
            +
                    r_size /= 1024
         | 
| 16 | 
            +
                    order += 1
         | 
| 17 | 
            +
                return f"{r_size:.2f} {units[order]}"
         | 
| 18 | 
            +
             | 
| 19 | 
            +
             | 
| 20 | 
            +
            def repo_files(r_type: str, r_id: str) -> dict:
         | 
| 21 | 
            +
                r_info = HF_API.repo_info(repo_id=r_id, repo_type=r_type, files_metadata=True)
         | 
| 22 | 
            +
                files = {}
         | 
| 23 | 
            +
                for sibling in r_info.siblings:
         | 
| 24 | 
            +
                    ext = sibling.rfilename.split(".")[-1]
         | 
| 25 | 
            +
                    if ext in files:
         | 
| 26 | 
            +
                        files[ext]["size"] += sibling.size
         | 
| 27 | 
            +
                        files[ext]["count"] += 1
         | 
| 28 | 
            +
                    else:
         | 
| 29 | 
            +
                        files[ext] = {}
         | 
| 30 | 
            +
                        files[ext]["size"] = sibling.size
         | 
| 31 | 
            +
                        files[ext]["count"] = 1
         | 
| 32 | 
            +
                return files
         | 
| 33 | 
            +
             | 
| 34 | 
            +
             | 
| 35 | 
            +
            def repo_size(r_type, r_id):
         | 
| 36 | 
            +
                r_refs = HF_API.list_repo_refs(repo_id=r_id, repo_type=r_type)
         | 
| 37 | 
            +
                repo_sizes = {}
         | 
| 38 | 
            +
                for branch in r_refs.branches:
         | 
| 39 | 
            +
                    try:
         | 
| 40 | 
            +
                        response = requests.get(
         | 
| 41 | 
            +
                            f"https://huggingface.co/api/{r_type}s/{r_id}/treesize/{branch.name}",
         | 
| 42 | 
            +
                            timeout=1000,
         | 
| 43 | 
            +
                        )
         | 
| 44 | 
            +
                        response = response.json()
         | 
| 45 | 
            +
                    except Exception:
         | 
| 46 | 
            +
                        response = {}
         | 
| 47 | 
            +
             | 
| 48 | 
            +
                    size = response.get("size")
         | 
| 49 | 
            +
                    if size is not None:
         | 
| 50 | 
            +
                        repo_sizes[branch.name] = size
         | 
| 51 | 
            +
                return repo_sizes
         | 
| 52 | 
            +
             | 
| 53 | 
            +
             | 
| 54 | 
            +
            def get_repo_info(r_type, r_id):
         | 
| 55 | 
            +
                repo_sizes = repo_size(r_type, r_id)
         | 
| 56 | 
            +
                repo_files_info = repo_files(r_type, r_id)
         | 
| 57 | 
            +
                rf_sizes_df = (
         | 
| 58 | 
            +
                    pd.DataFrame(repo_files_info)
         | 
| 59 | 
            +
                    .T.reset_index(names="ext")
         | 
| 60 | 
            +
                    .sort_values(by="size", ascending=False)
         | 
| 61 | 
            +
                )
         | 
| 62 | 
            +
                r_sizes_df = pd.DataFrame(repo_sizes, index=["size"]).T.reset_index(names="branch")
         | 
| 63 | 
            +
                r_sizes_df["formatted_size"] = r_sizes_df["size"].apply(format_repo_size)
         | 
| 64 | 
            +
                rf_sizes_df["formatted_size"] = rf_sizes_df["size"].apply(format_repo_size)
         | 
| 65 | 
            +
                r_sizes_df.columns = ["Branch", "bytes", "Size"]
         | 
| 66 | 
            +
                rf_sizes_df.columns = ["Extension", "bytes", "Count", "Size"]
         | 
| 67 | 
            +
                rf_sizes_plot = px.pie(
         | 
| 68 | 
            +
                    rf_sizes_df,
         | 
| 69 | 
            +
                    values="bytes",
         | 
| 70 | 
            +
                    names="Extension",
         | 
| 71 | 
            +
                    hover_data=["Size"],
         | 
| 72 | 
            +
                    title=f"File Distribution in {r_id}",
         | 
| 73 | 
            +
                    hole=0.3,
         | 
| 74 | 
            +
                )
         | 
| 75 | 
            +
                return (
         | 
| 76 | 
            +
                    gr.Row(visible=True),
         | 
| 77 | 
            +
                    gr.Dataframe(
         | 
| 78 | 
            +
                        value=rf_sizes_df[["Extension", "Count", "Size"]],
         | 
| 79 | 
            +
                        visible=True,
         | 
| 80 | 
            +
                    ),
         | 
| 81 | 
            +
                    gr.Plot(rf_sizes_plot, visible=True),
         | 
| 82 | 
            +
                    gr.Dataframe(value=r_sizes_df[["Branch", "Size"]], visible=True),
         | 
| 83 | 
            +
                )
         | 
| 84 | 
            +
             | 
| 85 | 
            +
             | 
| 86 | 
            +
            with gr.Blocks(theme="citrus") as demo:
         | 
| 87 | 
            +
                gr.Markdown("# Repository Information")
         | 
| 88 | 
            +
                gr.Markdown(
         | 
| 89 | 
            +
                    "Enter a repository ID and repository type and get back information about the repository's files and branches."
         | 
| 90 | 
            +
                )
         | 
| 91 | 
            +
                with gr.Blocks():
         | 
| 92 | 
            +
                    repo_id = gr.Textbox(label="Repository ID", placeholder="123456")
         | 
| 93 | 
            +
                    repo_type = gr.Radio(
         | 
| 94 | 
            +
                        choices=["model", "dataset", "space"],
         | 
| 95 | 
            +
                        label="Repository Type",
         | 
| 96 | 
            +
                        value="model",
         | 
| 97 | 
            +
                    )
         | 
| 98 | 
            +
                    search_button = gr.Button(value="Search")
         | 
| 99 | 
            +
                with gr.Blocks():
         | 
| 100 | 
            +
                    with gr.Row(visible=False) as results:
         | 
| 101 | 
            +
                        with gr.Column():
         | 
| 102 | 
            +
                            gr.Markdown("## File Information")
         | 
| 103 | 
            +
                            with gr.Row():
         | 
| 104 | 
            +
                                file_info = gr.Dataframe(visible=False)
         | 
| 105 | 
            +
                                file_info_plot = gr.Plot(visible=False)
         | 
| 106 | 
            +
                            gr.Markdown("## Branch Sizes")
         | 
| 107 | 
            +
                            branch_sizes = gr.Dataframe(visible=False)
         | 
| 108 | 
            +
             | 
| 109 | 
            +
                search_button.click(
         | 
| 110 | 
            +
                    get_repo_info,
         | 
| 111 | 
            +
                    inputs=[repo_type, repo_id],
         | 
| 112 | 
            +
                    outputs=[results, file_info, file_info_plot, branch_sizes],
         | 
| 113 | 
            +
                )
         | 
| 114 | 
            +
             | 
| 115 | 
            +
            demo.launch()
         | 
    	
        poetry.lock
    ADDED
    
    | The diff for this file is too large to render. 
		See raw diff | 
|  | 
    	
        pyproject.toml
    ADDED
    
    | @@ -0,0 +1,17 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            [tool.poetry]
         | 
| 2 | 
            +
            name = "repo-info"
         | 
| 3 | 
            +
            version = "0.1.0"
         | 
| 4 | 
            +
            description = ""
         | 
| 5 | 
            +
            authors = ["jsulz <[email protected]>"]
         | 
| 6 | 
            +
            readme = "README.md"
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            [tool.poetry.dependencies]
         | 
| 9 | 
            +
            python = "^3.12"
         | 
| 10 | 
            +
            gradio = "^5.5.0"
         | 
| 11 | 
            +
            huggingface-hub = "^0.26.2"
         | 
| 12 | 
            +
            plotly = "^5.24.1"
         | 
| 13 | 
            +
             | 
| 14 | 
            +
             | 
| 15 | 
            +
            [build-system]
         | 
| 16 | 
            +
            requires = ["poetry-core"]
         | 
| 17 | 
            +
            build-backend = "poetry.core.masonry.api"
         | 
    	
        requirements.txt
    ADDED
    
    | @@ -0,0 +1,54 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            aiofiles==23.2.1 
         | 
| 2 | 
            +
            annotated-types==0.7.0 
         | 
| 3 | 
            +
            anyio==4.6.2.post1 
         | 
| 4 | 
            +
            certifi==2024.8.30 
         | 
| 5 | 
            +
            charset-normalizer==3.4.0 
         | 
| 6 | 
            +
            click==8.1.7  
         | 
| 7 | 
            +
            colorama==0.4.6
         | 
| 8 | 
            +
            fastapi==0.115.5 
         | 
| 9 | 
            +
            ffmpy==0.4.0 
         | 
| 10 | 
            +
            filelock==3.16.1 
         | 
| 11 | 
            +
            fsspec==2024.10.0 
         | 
| 12 | 
            +
            gradio-client==1.4.2 
         | 
| 13 | 
            +
            gradio==5.5.0 
         | 
| 14 | 
            +
            h11==0.14.0 
         | 
| 15 | 
            +
            httpcore==1.0.6 
         | 
| 16 | 
            +
            httpx==0.27.2 
         | 
| 17 | 
            +
            huggingface-hub==0.26.2 
         | 
| 18 | 
            +
            idna==3.10 
         | 
| 19 | 
            +
            jinja2==3.1.4 
         | 
| 20 | 
            +
            markdown-it-py==3.0.0  
         | 
| 21 | 
            +
            markupsafe==2.1.5 
         | 
| 22 | 
            +
            mdurl==0.1.2  
         | 
| 23 | 
            +
            numpy==2.1.3 
         | 
| 24 | 
            +
            orjson==3.10.11 
         | 
| 25 | 
            +
            packaging==24.2 
         | 
| 26 | 
            +
            pandas==2.2.3 
         | 
| 27 | 
            +
            pillow==11.0.0 
         | 
| 28 | 
            +
            plotly==5.24.1 
         | 
| 29 | 
            +
            pydantic-core==2.23.4 
         | 
| 30 | 
            +
            pydantic==2.9.2 
         | 
| 31 | 
            +
            pydub==0.25.1 
         | 
| 32 | 
            +
            pygments==2.18.0  
         | 
| 33 | 
            +
            python-dateutil==2.9.0.post0 
         | 
| 34 | 
            +
            python-multipart==0.0.12 
         | 
| 35 | 
            +
            pytz==2024.2 
         | 
| 36 | 
            +
            pyyaml==6.0.2 
         | 
| 37 | 
            +
            requests==2.32.3 
         | 
| 38 | 
            +
            rich==13.9.4  
         | 
| 39 | 
            +
            ruff==0.7.3  
         | 
| 40 | 
            +
            safehttpx==0.1.1 
         | 
| 41 | 
            +
            semantic-version==2.10.0 
         | 
| 42 | 
            +
            shellingham==1.5.4  
         | 
| 43 | 
            +
            six==1.16.0 
         | 
| 44 | 
            +
            sniffio==1.3.1 
         | 
| 45 | 
            +
            starlette==0.41.2 
         | 
| 46 | 
            +
            tenacity==9.0.0 
         | 
| 47 | 
            +
            tomlkit==0.12.0 
         | 
| 48 | 
            +
            tqdm==4.67.0 
         | 
| 49 | 
            +
            typer==0.13.0  
         | 
| 50 | 
            +
            typing-extensions==4.12.2 
         | 
| 51 | 
            +
            tzdata==2024.2 
         | 
| 52 | 
            +
            urllib3==2.2.3 
         | 
| 53 | 
            +
            uvicorn==0.32.0  
         | 
| 54 | 
            +
            websockets==12.0 
         | 
