File size: 5,455 Bytes
2823b9e
 
 
 
 
 
 
 
bdd07f4
2823b9e
 
 
 
 
 
20f0f1d
9d8bb1b
 
 
2823b9e
 
 
2447e16
2823b9e
 
 
9d8bb1b
20f0f1d
9d8bb1b
20f0f1d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9d8bb1b
20f0f1d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2823b9e
 
 
 
 
 
 
 
 
 
 
f59f6b1
 
 
 
 
 
 
 
 
 
 
 
 
 
9981d93
2823b9e
8aa0325
f59f6b1
 
 
2823b9e
 
f59f6b1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import gradio as gr
from huggingface_hub import HfApi
from datetime import datetime, timedelta
import pandas as pd

# Initialize the Hugging Face API
api = HfApi()

def get_recent_models(min_likes, days_ago, filter_string, search_string):
    # Get the current date and date from `days_ago` days ago
    today = datetime.utcnow().replace(tzinfo=None)
    start_date = (today - timedelta(days=days_ago)).replace(tzinfo=None)
    
    # Initialize an empty list to store the filtered models
    recent_models = []
    
    # Split filter and search strings into lists of substrings
    filter_substrings = filter_string.lower().split(';') if filter_string else []
    search_substrings = search_string.lower().split(';') if search_string else []
    
    # Use a generator to fetch models in batches, sorted by likes in descending order
    for model in api.list_models(sort="likes", direction=-1):
        if model.likes >= min_likes:
            if hasattr(model, "created_at") and model.created_at:
                # Ensure created_at is offset-naive
                created_at_date = model.created_at.replace(tzinfo=None)
                if search_substrings:
                    if any(term in model.modelId.lower() for term in search_substrings):
                        if filter_substrings:
                            if not any(sub in model.modelId.lower() for sub in filter_substrings):
                                if created_at_date >= start_date:
                                    task = model.pipeline_tag if hasattr(model, "pipeline_tag") else "N/A"
                                    recent_models.append({
                                        "Model ID": f'<a href="https://huggingface.co/{model.modelId}" target="_blank">{model.modelId}</a>',
                                        "Likes": model.likes,
                                        "Creation Date": model.created_at.strftime("%Y-%m-%d %H:%M"),
                                        "Task": task
                                    })
                        else:
                            if created_at_date >= start_date:
                                task = model.pipeline_tag if hasattr(model, "pipeline_tag") else "N/A"
                                recent_models.append({
                                    "Model ID": f'<a href="https://huggingface.co/{model.modelId}" target="_blank">{model.modelId}</a>',
                                    "Likes": model.likes,
                                    "Creation Date": model.created_at.strftime("%Y-%m-%d %H:%M"),
                                    "Task": task
                                })
                else:
                    if filter_substrings:
                        if not any(sub in model.modelId.lower() for sub in filter_substrings):
                            if created_at_date >= start_date:
                                task = model.pipeline_tag if hasattr(model, "pipeline_tag") else "N/A"
                                recent_models.append({
                                    "Model ID": f'<a href="https://huggingface.co/{model.modelId}" target="_blank">{model.modelId}</a>',
                                    "Likes": model.likes,
                                    "Creation Date": model.created_at.strftime("%Y-%m-%d %H:%M"),
                                    "Task": task
                                })
                    else:
                        if created_at_date >= start_date:
                            task = model.pipeline_tag if hasattr(model, "pipeline_tag") else "N/A"
                            recent_models.append({
                                "Model ID": f'<a href="https://huggingface.co/{model.modelId}" target="_blank">{model.modelId}</a>',
                                "Likes": model.likes,
                                "Creation Date": model.created_at.strftime("%Y-%m-%d %H:%M"),
                                "Task": task
                            })
        else:
            # Since the models are sorted by likes in descending order,
            # we can stop once we hit a model with 10 or fewer likes
            break
    
    # Convert the list of dictionaries to a pandas DataFrame
    df = pd.DataFrame(recent_models)
    
    return df

# Define the Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# Model Drops Tracker πŸš€")
    gr.Markdown("Overwhelmed by the rapid pace of model releases? πŸ˜… You're not alone! That's exactly why I built this tool. Easily filter recent models from the Hub by setting a minimum number of likes and the number of days since their release. Click on a model to see its card. Use `;` to split filter and search")
    with gr.Row():
        likes_slider = gr.Slider(minimum=1, maximum=100, step=1, value=5, label="Minimum Likes")
        days_slider = gr.Slider(minimum=1, maximum=30, step=1, value=3, label="Days Ago")
    with gr.Row():
        filter_text = gr.Text(label="Filter", max_lines=1)
        search_text = gr.Text(label="Search", max_lines=1)
    
    btn = gr.Button("Run")
    
    with gr.Column():
        df = gr.DataFrame(
            headers=["Model ID", "Likes", "Creation Date", "Task"],
            wrap=True,
            datatype=["html", "number", "str"],
        )
    
    btn.click(fn=get_recent_models, inputs=[likes_slider, days_slider, filter_text, search_text], outputs=df)

if __name__ == "__main__":
    demo.launch()