File size: 2,535 Bytes
b7bbd70
6c65400
 
86f4186
6c65400
 
 
 
 
 
 
 
 
 
 
 
 
 
86f4186
6c65400
 
 
 
 
 
 
 
86f4186
6c65400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2f3bed4
6c65400
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import os
import gradio as gr
from repo_utils import extract_repo_content

def format_output(extracted_content, repo_url):
    formatted_output = f"# Repository URL: {repo_url}\n\n"
    for file_data in extracted_content:
        if isinstance(file_data, dict) and 'header' in file_data:
            formatted_output += f"### File: {file_data['header']['name']}\n"
            formatted_output += f"**Type:** {file_data['header']['type']}\n"
            formatted_output += f"**Size:** {file_data['header']['size']} bytes\n"
            formatted_output += f"**Created:** {file_data['header']['creation_date']}\n"
            formatted_output += f"**Modified:** {file_data['header']['modification_date']}\n"
            formatted_output += "#### Content:\n"
            formatted_output += f"```\n{file_data['content']}\n```\n\n"
        else:
            formatted_output += "Error in file data format.\n"
    return formatted_output

def extract_and_display(url):
    hf_token = os.getenv("HF_TOKEN")
    hf_user = os.getenv("SPACE_AUTHOR_NAME")
    if not hf_token or not hf_user:
        return "Error: HF_TOKEN or SPACE_AUTHOR_NAME environment variable is not set."
    extracted_content = extract_repo_content(url, hf_token, hf_user)
    formatted_output = format_output(extracted_content, url)
    return formatted_output

app = gr.Blocks(theme="sudeepshouche/minimalist")

with app:
    gr.Markdown("# Hugging Face Space / Model Repository Content Extractor")
    url_input = gr.Textbox(label="https:// URL of Repository", placeholder="Enter the repository URL here OR select an example below...")
    url_examples = gr.Examples(
        examples=[
            ["https://huggingface.co/spaces/big-vision/paligemma-hf"],
            ["https://huggingface.co/google/paligemma-3b-mix-224"],
            ["https://huggingface.co/microsoft/Phi-3-vision-128k-instruct"],
            ["https://huggingface.co/llava-hf/llava-v1.6-mistral-7b-hf"]
        ],
        inputs=url_input
    )
    output_display = gr.Textbox(label="Extracted Repository Content", show_copy_button=True, lines=20, placeholder="Repository content will be extracted here...\n\nMetadata is captured for all files, but text content provided only for files less than 32 kb\n\n\n\nReview and search through the content here OR simply copy it for offline analysis!!. 🤖")
    extract_button = gr.Button("Extract Content")
    
    extract_button.click(fn=extract_and_display, inputs=url_input, outputs=output_display)

if __name__ == "__main__":
    app.launch()