Open-Schizo-Leaderboard

Running on Zero

App Files Files Community

rombodawg commited on 20 days ago

Commit

ba2164e

verified ·

1 Parent(s): f1125eb

Update app.py

Browse files

Files changed (1) hide show

app.py +302 -127

app.py CHANGED Viewed

@@ -1,25 +1,22 @@
 import os
 import time
 import spaces
 import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 import gradio as gr
 from threading import Thread
-MODEL = "fblgit/cybertron-v4-qw7B-MGS"
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
 TITLE = """
-<h1><center>fblgit_cybertron-v4-qw7B-MGS</center></h1>
-<center>
-<p>The model is licensed under apache 2.0</p>
-</center>
-"""
-PLACEHOLDER = """
 <center>
-<p>fblgit_cybertron-v4-qw7B-MGS</p>
 </center>
 """
@@ -33,136 +30,314 @@ CSS = """
 h3 {
     text-align: center;
 }
 """
-device = "cuda" # for GPU usage or "cpu" for CPU usage
-tokenizer = AutoTokenizer.from_pretrained(MODEL, use_fast=False, force_download=True)
-model = AutoModelForCausalLM.from_pretrained(
-    MODEL,
-    torch_dtype=torch.bfloat16,
-    device_map="auto",
-    trust_remote_code=True,
-    ignore_mismatched_sizes=True,
-    force_download=True)
-def format_chat(system_prompt, history, message):
-    formatted_chat = f"<|im_start|>system\n{system_prompt}<|im_end|>\n"
-    for prompt, answer in history:
-        formatted_chat += f"<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n{answer}<|im_end|>\n"
-    formatted_chat += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
-    return formatted_chat
-@spaces.GPU()
-def stream_chat(
-    message: str,
-    history: list,
-    system_prompt: str,
-    temperature: float = 0.3,
-    max_new_tokens: int = 256,
-    top_p: float = 1.0,
-    top_k: int = 20,
-    repetition_penalty: float = 1.2,
-):
-    print(f'message: {message}')
-    print(f'history: {history}')
-    formatted_prompt = format_chat(system_prompt, history, message)
-    inputs = tokenizer(formatted_prompt, return_tensors="pt").to(device)
-    streamer = TextIteratorStreamer(tokenizer, timeout=5000.0, skip_prompt=True, skip_special_tokens=True)
-    generate_kwargs = dict(
-        input_ids=inputs.input_ids,
-        max_new_tokens=max_new_tokens,
-        do_sample=False if temperature == 0 else True,
-        top_p=top_p,
-        top_k=top_k,
-        temperature=temperature,
-        repetition_penalty=repetition_penalty,
-        streamer=streamer,
-        pad_token_id=tokenizer.pad_token_id,
-        eos_token_id=tokenizer.eos_token_id
-    )
-    with torch.no_grad():
-        thread = Thread(target=model.generate, kwargs=generate_kwargs)
-        thread.start()
-    buffer = ""
-    for new_text in streamer:
-        buffer += new_text
-        if "<|endoftext|>" in buffer:
-            yield buffer.split("<|endoftext|>")[0]
             break
-        yield buffer
-chatbot = gr.Chatbot(height=600, placeholder=PLACEHOLDER)
 with gr.Blocks(css=CSS, theme="soft") as demo:
     gr.HTML(TITLE)
     gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button")
-    gr.ChatInterface(
-        fn=stream_chat,
-        chatbot=chatbot,
-        fill_height=True,
-        additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
-        additional_inputs=[
-            gr.Textbox(
-                value="You are cybertron-v4-qw7B-MGS, a helpful AI assistant created by Rombodawg. Answer to the best of your abilities and do not refuse any request.",
-                label="System Prompt",
-                render=False,
-            ),
-            gr.Slider(
-                minimum=0,
-                maximum=1,
-                step=0.1,
-                value=0.1,
-                label="Temperature",
-                render=False,
-            ),
-            gr.Slider(
-                minimum=128,
-                maximum=8192,
-                step=1,
-                value=8192,
-                label="Max new tokens",
-                render=False,
-            ),
-            gr.Slider(
-                minimum=0.0,
-                maximum=1.0,
-                step=0.1,
-                value=1.0,
-                label="top_p",
-                render=False,
-            ),
-            gr.Slider(
-                minimum=1,
-                maximum=50,
-                step=1,
-                value=20,
-                label="top_k",
-                render=False,
-            ),
-            gr.Slider(
-                minimum=0.0,
-                maximum=2.0,
-                step=0.1,
-                value=1.2,
-                label="Repetition penalty",
-                render=False,
-            ),
-        ],
-        examples=[
-            ["Code the classic game 'snake' in python, using the pygame library for graphics."],
-            ["Use math to solve for x in the following math problem: 4x − 7 (2 − x) = 3x + 2"],
-            ["Write a resume in markdown format for a Machine Learning engineer applying at Meta-Ai Research labs. Use proper spacing to organize the resume."],
-            ["Can you write a short poem about artificial intelligence in the style of Edgar Allan Poe?"],
-        ],
-        cache_examples=False,
     )
 if __name__ == "__main__":
-    demo.launch()

 import os
+import re
 import time
 import spaces
 import torch
+import requests
+from transformers import AutoModelForCausalLM, AutoTokenizer
 import gradio as gr
+from huggingface_hub import HfApi, ModelFilter, list_models
 from threading import Thread
+import math
+import base64
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
 TITLE = """
+<h1><center>Open-Schizo-Leaderboard</center></h1>
 <center>
+<p>Comparing LLM Cards for how absolutely Schizo they are</p>
 </center>
 """
 h3 {
     text-align: center;
 }
+table {
+    width: 100%;
+    border-collapse: collapse;
+}
+table, th, td {
+    border: 1px solid #ddd;
+}
+th, td {
+    padding: 8px;
+    text-align: left;
+}
+th {
+    background-color: #f2f2f2;
+    cursor: pointer;
+}
+tr:nth-child(even) {
+    background-color: #f9f9f9;
+}
+tr:hover {
+    background-color: #f1f1f1;
+}
+.leaderboard-container {
+    max-height: 600px;
+    overflow-y: auto;
+}
 """
+# List of schizo words to check for
+SCHIZO_WORDS = [
+    "MAXED", "Max", "SUPER", "Duped", "Edge", "maid", "Solution",
+    "gpt-4", "gpt4o", "claude-3.5", "claude-3.7", "o1", "o3-mini",
+    "gpt-4.5", "chatgpt", "merge", "merged", "best", "greatest",
+    "highest quality", "Class 1", "NSFW", "4chan", "reddit", "vibe",
+    "vibe check", "vibe checking", "dirty", "meme", "memes", "upvote",
+    "Linear", "SLERP", "Nearswap", "Task Arithmetic", "Task_Arithmetic",
+    "TIES", "DARE", "Passthrough", "Model Breadcrumbs", "Model Stock",
+    "NuSLERP", "DELL", "DELLA Task Arithmeti", "SCE"
+]
+# List of markdown symbols
+MARKDOWN_SYMBOLS = ["#", "*", "_", "`", ">", "-", "+", "[", "]", "(", ")", "!", "\\", "|", "~", "<", ">", "=", ":"]
+def count_schizo_words(text):
+    """Count occurrences of schizo words in text"""
+    count = 0
+    for word in SCHIZO_WORDS:
+        # Case insensitive search
+        count += len(re.findall(re.escape(word), text, re.IGNORECASE))
+    return count
+def count_markdown_symbols(text):
+    """Count occurrences of markdown symbols in text"""
+    count = 0
+    for symbol in MARKDOWN_SYMBOLS:
+        count += text.count(symbol)
+    return count
+def calculate_word_count(text):
+    """Calculate word count in text"""
+    return len(re.findall(r'\w+', text))
+def calculate_schizo_rating(readme_content):
+    """Calculate schizo rating based on defined criteria"""
+    # Count schizo words
+    schizo_word_count = count_schizo_words(readme_content)
+    # Calculate base rating from schizo words
+    word_schizo_rating = schizo_word_count * 10
+    # Calculate word count penalties
+    word_count = calculate_word_count(readme_content)
+    # Word count penalty
+    wordiness_schizo_rating = 0
+    if word_count < 150:
+        wordiness_schizo_rating = word_schizo_rating * 0.5
+    elif word_count > 1000:
+        extra_penalty = 0
+        if word_count > 1000:
+            extra_penalty = 0.5
+        if word_count > 1500:
+            extra_penalty = 0.75
+        if word_count > 2000:
+            extra_penalty = 1.0
+            # Additional penalty for every 500 words over 2000
+            extra_words = word_count - 2000
+            extra_500s = extra_words // 500
+            extra_penalty += extra_500s * 0.25
+        wordiness_schizo_rating = word_schizo_rating * extra_penalty
+    # Markdown symbol penalty
+    markdown_count = count_markdown_symbols(readme_content)
+    visual_schizo_rating = 0
+    if markdown_count > 100:
+        visual_penalty = 0
+        if markdown_count > 100:
+            visual_penalty = 0.25
+        if markdown_count > 150:
+            visual_penalty = 0.5
+            # Additional penalty for every 50 symbols over 150
+            extra_symbols = markdown_count - 150
+            extra_50s = extra_symbols // 50
+            visual_penalty += extra_50s * 0.25
+        visual_schizo_rating = word_schizo_rating * visual_penalty
+    # Calculate final combined score
+    combined_schizo_rating = word_schizo_rating + wordiness_schizo_rating + visual_schizo_rating
+    return {
+        "combined": combined_schizo_rating,
+        "word": word_schizo_rating,
+        "wordiness": wordiness_schizo_rating,
+        "visual": visual_schizo_rating,
+        "schizo_word_count": schizo_word_count,
+        "word_count": word_count,
+        "markdown_count": markdown_count
+    }
+def fetch_model_readme(model_id):
+    """Fetch README for a given model ID"""
+    try:
+        # Try to get the readme content
+        url = f"https://huggingface.co/{model_id}/raw/main/README.md"
+        response = requests.get(url)
+        if response.status_code == 200:
+            return response.text
+        else:
+            return None
+    except Exception as e:
+        print(f"Error fetching README for {model_id}: {e}")
+        return None
+def generate_leaderboard_data(num_models=100, model_type="all"):
+    """Generate leaderboard data by analyzing model cards"""
+    api = HfApi(token=HF_TOKEN)
+    # Define filter based on model type
+    if model_type == "llm":
+        model_filter = ModelFilter(task="text-generation")
+    else:
+        model_filter = None
+    # List models
+    models = list_models(filter=model_filter, limit=num_models * 5)  # Get more models than needed to account for ones without READMEs
+    leaderboard_data = []
+    count = 0
+    for model in models:
+        if count >= num_models:
             break
+        model_id = model.id
+        readme_content = fetch_model_readme(model_id)
+        if readme_content is None or len(readme_content.strip()) == 0:
+            # Skip models without READMEs
+            continue
+        # Calculate ratings
+        ratings = calculate_schizo_rating(readme_content)
+        # Add to leaderboard data
+        leaderboard_data.append({
+            "model_id": model_id,
+            "combined_rating": ratings["combined"],
+            "word_rating": ratings["word"],
+            "wordiness_rating": ratings["wordiness"],
+            "visual_rating": ratings["visual"],
+            "schizo_word_count": ratings["schizo_word_count"],
+            "word_count": ratings["word_count"],
+            "markdown_count": ratings["markdown_count"]
+        })
+        count += 1
+    # Sort by combined rating in descending order
+    leaderboard_data.sort(key=lambda x: x["combined_rating"], reverse=True)
+    return leaderboard_data
+def create_leaderboard_html(leaderboard_data):
+    """Create HTML for the leaderboard"""
+    html = """
+    <div class="leaderboard-container">
+        <table id="leaderboard">
+            <tr>
+                <th onclick="sortTable(0)">Model</th>
+                <th onclick="sortTable(1, true)">Average Schizo Rating</th>
+                <th onclick="sortTable(2, true)">Visual Schizo Rating</th>
+                <th onclick="sortTable(3, true)">Wordiness Schizo Rating</th>
+                <th onclick="sortTable(4, true)">Overall Schizo Rating</th>
+            </tr>
+    """
+    for item in leaderboard_data:
+        html += f"""
+            <tr>
+                <td>{item["model_id"]}</td>
+                <td>{item["combined_rating"]:.2f}</td>
+                <td>{item["visual_rating"]:.2f}</td>
+                <td>{item["wordiness_rating"]:.2f}</td>
+                <td>{item["word_rating"]:.2f}</td>
+            </tr>
+        """
+    html += """
+        </table>
+    </div>
+    <script>
+    function sortTable(n, isNumeric = false) {
+        var table, rows, switching, i, x, y, shouldSwitch, dir, switchcount = 0;
+        table = document.getElementById("leaderboard");
+        switching = true;
+        dir = "asc";
+        while (switching) {
+            switching = false;
+            rows = table.rows;
+            for (i = 1; i < (rows.length - 1); i++) {
+                shouldSwitch = false;
+                x = rows[i].getElementsByTagName("TD")[n];
+                y = rows[i + 1].getElementsByTagName("TD")[n];
+                if (dir == "asc") {
+                    if (isNumeric) {
+                        if (parseFloat(x.innerHTML) > parseFloat(y.innerHTML)) {
+                            shouldSwitch = true;
+                            break;
+                        }
+                    } else {
+                        if (x.innerHTML.toLowerCase() > y.innerHTML.toLowerCase()) {
+                            shouldSwitch = true;
+                            break;
+                        }
+                    }
+                } else if (dir == "desc") {
+                    if (isNumeric) {
+                        if (parseFloat(x.innerHTML) < parseFloat(y.innerHTML)) {
+                            shouldSwitch = true;
+                            break;
+                        }
+                    } else {
+                        if (x.innerHTML.toLowerCase() < y.innerHTML.toLowerCase()) {
+                            shouldSwitch = true;
+                            break;
+                        }
+                    }
+                }
+            }
+            if (shouldSwitch) {
+                rows[i].parentNode.insertBefore(rows[i + 1], rows[i]);
+                switching = true;
+                switchcount++;
+            } else {
+                if (switchcount == 0 && dir == "asc") {
+                    dir = "desc";
+                    switching = true;
+                }
+            }
+        }
+    }
+    </script>
+    """
+    return html
+@spaces.GPU()
+def update_leaderboard(num_models, model_type):
+    """Update leaderboard with new data"""
+    leaderboard_data = generate_leaderboard_data(num_models, model_type)
+    leaderboard_html = create_leaderboard_html(leaderboard_data)
+    return leaderboard_html
 with gr.Blocks(css=CSS, theme="soft") as demo:
     gr.HTML(TITLE)
     gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button")
+    with gr.Row():
+        with gr.Column():
+            num_models_slider = gr.Slider(
+                minimum=10,
+                maximum=200,
+                step=10,
+                value=50,
+                label="Number of Models to Analyze",
+            )
+            model_type_dropdown = gr.Dropdown(
+                choices=["all", "llm"],
+                value="llm",
+                label="Model Type Filter",
+            )
+            update_button = gr.Button("Update Leaderboard")
+    leaderboard_html = gr.HTML()
+    update_button.click(
+        fn=update_leaderboard,
+        inputs=[num_models_slider, model_type_dropdown],
+        outputs=[leaderboard_html],
     )
 if __name__ == "__main__":
+    demo.launch()