|  | import pandas as pd | 
					
						
						|  | import requests | 
					
						
						|  | import os | 
					
						
						|  |  | 
					
						
						|  | import gradio | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | import gradio as gr | 
					
						
						|  |  | 
					
						
						|  | from info.train_a_model import ( | 
					
						
						|  | LLM_BENCHMARKS_TEXT) | 
					
						
						|  | from info.submit import ( | 
					
						
						|  | SUBMIT_TEXT) | 
					
						
						|  | from info.deployment import ( | 
					
						
						|  | DEPLOY_TEXT) | 
					
						
						|  | from info.programs import ( | 
					
						
						|  | PROGRAMS_TEXT) | 
					
						
						|  | from info.citation import( | 
					
						
						|  | CITATION_TEXT) | 
					
						
						|  | from info.validated_chat_models import( | 
					
						
						|  | VALIDATED_CHAT_MODELS) | 
					
						
						|  | from info.about import( | 
					
						
						|  | ABOUT) | 
					
						
						|  | from src.processing import filter_benchmarks_table | 
					
						
						|  |  | 
					
						
						|  | inference_endpoint_url = os.environ['inference_endpoint_url'] | 
					
						
						|  | submission_form_endpoint_url = os.environ['submission_form_endpoint_url'] | 
					
						
						|  | inference_concurrency_limit = os.environ['inference_concurrency_limit'] | 
					
						
						|  |  | 
					
						
						|  | demo = gr.Blocks() | 
					
						
						|  |  | 
					
						
						|  | with demo: | 
					
						
						|  |  | 
					
						
						|  | gr.HTML("""<h1 align="center" id="space-title">๐คPowered-by-Intel LLM Leaderboard ๐ป</h1>""") | 
					
						
						|  | gr.Markdown("""This leaderboard is designed to evaluate, score, and rank open-source LLMs | 
					
						
						|  | that have been pre-trained or fine-tuned on Intel Hardware ๐ฆพ. To submit your model for evaluation, | 
					
						
						|  | follow the instructions and complete the form in the ๐๏ธ Submit tab. Models submitted to the leaderboard are evaluated | 
					
						
						|  | on the Intel Developer Cloud โ๏ธ. The evaluation platform consists of Gaudi Accelerators and Xeon CPUs running benchmarks from | 
					
						
						|  | the  [Eleuther AI Language Model Evaluation Harness](https://github.com/EleutherAI/lm-evaluation-harness).""") | 
					
						
						|  | gr.Markdown("""A special shout-out to the ๐ค [Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) | 
					
						
						|  | team for generously sharing their code and best | 
					
						
						|  | practices, ensuring that AI Developers have a valuable and enjoyable tool at their disposal.""") | 
					
						
						|  |  | 
					
						
						|  | def submit_to_endpoint(model_name, revision_name, model_type, hw_type, terms, precision, weight_type, training_infra, affiliation, base_model): | 
					
						
						|  |  | 
					
						
						|  | data = { | 
					
						
						|  | "model_name": model_name, | 
					
						
						|  | "revision_name": revision_name, | 
					
						
						|  | "model_type": model_type, | 
					
						
						|  | "hw_type": hw_type, | 
					
						
						|  | "terms": terms, | 
					
						
						|  | "precision": precision, | 
					
						
						|  | "weight_type": weight_type, | 
					
						
						|  | "training_infrastructure": training_infra, | 
					
						
						|  | "affiliation": affiliation, | 
					
						
						|  | "base_model": base_model | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | url = submission_form_endpoint_url | 
					
						
						|  |  | 
					
						
						|  | for key, value in data.items(): | 
					
						
						|  | if value == "" or (key == "terms" and value is False): | 
					
						
						|  | return f"โ Failed Submission: '{key}' ensure all fields are completed and that you have agreed to evaluation terms." | 
					
						
						|  |  | 
					
						
						|  | try: | 
					
						
						|  | response = requests.post(url, json=data) | 
					
						
						|  | if response.status_code == 200: | 
					
						
						|  | return "โ
 Submission successful! Please allow for 5 - 10 days for model evaluation to be completed. We will contact you \ | 
					
						
						|  | through your model's discussion forum if we encounter any issues with your submission." | 
					
						
						|  | else: | 
					
						
						|  | return f"Submission failed with status code {response.status_code}" | 
					
						
						|  | except Exception as e: | 
					
						
						|  | return f"โFailed to submit due to an error: {str(e)}" | 
					
						
						|  |  | 
					
						
						|  | with gr.Accordion("Chat with Top Models on the Leaderboard Here ๐ฌ", open=False): | 
					
						
						|  |  | 
					
						
						|  | chat_model_dropdown = gr.Dropdown( | 
					
						
						|  | choices=VALIDATED_CHAT_MODELS, | 
					
						
						|  | label="Select a leaderboard model to chat with. ", | 
					
						
						|  | multiselect=False, | 
					
						
						|  | value=VALIDATED_CHAT_MODELS[0], | 
					
						
						|  | interactive=True, | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | chat_model_selection = 'yuriachermann/My_AGI_llama_2_7B' | 
					
						
						|  |  | 
					
						
						|  | def call_api_and_stream_response(query, chat_model): | 
					
						
						|  | """ | 
					
						
						|  | Call the API endpoint and yield characters as they are received. | 
					
						
						|  | This function simulates streaming by yielding characters one by one. | 
					
						
						|  | """ | 
					
						
						|  | url = inference_endpoint_url | 
					
						
						|  | params = {"query": query, "selected_model": chat_model} | 
					
						
						|  | with requests.get(url, json=params, stream=True) as r: | 
					
						
						|  | for chunk in r.iter_content(chunk_size=1): | 
					
						
						|  | if chunk: | 
					
						
						|  | yield chunk.decode() | 
					
						
						|  |  | 
					
						
						|  | def get_response(query, history): | 
					
						
						|  | """ | 
					
						
						|  | Wrapper function to call the streaming API and compile the response. | 
					
						
						|  | """ | 
					
						
						|  | response = '' | 
					
						
						|  | for char in call_api_and_stream_response(query, chat_model=chat_model_selection): | 
					
						
						|  | if char == '<': | 
					
						
						|  | break | 
					
						
						|  | response += char | 
					
						
						|  | yield [(f"๐ค Response from LLM: {chat_model_selection}", response)] | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | chatbot = gr.Chatbot() | 
					
						
						|  | msg = gr.Textbox() | 
					
						
						|  | submit = gr.Button("Submit") | 
					
						
						|  | clear = gr.Button("Clear") | 
					
						
						|  | def user(user_message, history): | 
					
						
						|  | return "", history + [[user_message, None]] | 
					
						
						|  | def clear_chat(*args): | 
					
						
						|  | return [] | 
					
						
						|  | submit.click( | 
					
						
						|  | fn=get_response, | 
					
						
						|  | inputs=[msg, chatbot], | 
					
						
						|  | outputs=chatbot | 
					
						
						|  | ) | 
					
						
						|  | clear.click( | 
					
						
						|  | fn=clear_chat, | 
					
						
						|  | inputs=None, | 
					
						
						|  | outputs=chatbot | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | with gr.Tabs(elem_classes="tab-buttons") as tabs: | 
					
						
						|  | with gr.TabItem("๐ LLM Leaderboard", elem_id="llm-benchmark-table", id=0): | 
					
						
						|  | with gr.Row(): | 
					
						
						|  | with gr.Column(): | 
					
						
						|  | filter_hw = gr.CheckboxGroup(choices=["Gaudi","Xeon","GPU Max","Arc GPU","Core Ultra"], | 
					
						
						|  | label="Select Training Platform*", | 
					
						
						|  | elem_id="compute_platforms", | 
					
						
						|  | value=["Gaudi","Xeon","GPU Max","Arc GPU","Core Ultra"]) | 
					
						
						|  | filter_platform = gr.CheckboxGroup(choices=["Intel Developer Cloud","AWS","Azure","Google Cloud Platform","Local"], | 
					
						
						|  | label="Training Infrastructure*", | 
					
						
						|  | elem_id="training_infra", | 
					
						
						|  | value=["Intel Developer Cloud","AWS","Azure","Google Cloud Platform","Local"]) | 
					
						
						|  | filter_affiliation = gr.CheckboxGroup(choices=["No Affiliation","Intel Innovator","Student Ambassador","Intel Liftoff", "Intel Engineering", "Other"], | 
					
						
						|  | label="Intel Program Affiliation", | 
					
						
						|  | elem_id="program_affiliation", | 
					
						
						|  | value=["No Affiliation","Intel Innovator","Student Ambassador","Intel Liftoff", "Intel Engineering", "Other"]) | 
					
						
						|  |  | 
					
						
						|  | with gr.Column(): | 
					
						
						|  | filter_size = gr.CheckboxGroup(choices=[1,2,3,5,7,13,35,60,70,100], | 
					
						
						|  | label="Model Sizes (Billion of Parameters)", | 
					
						
						|  | elem_id="parameter_size", | 
					
						
						|  | value=[1,2,3,5,7,13,35,60,70,100]) | 
					
						
						|  | filter_precision = gr.CheckboxGroup(choices=["fp32","fp16","bf16","int8","fp8", "int4"], | 
					
						
						|  | label="Model Precision", | 
					
						
						|  | elem_id="precision", | 
					
						
						|  | value=["fp32","fp16","bf16","int8","fp8", "int4"]) | 
					
						
						|  | filter_type = gr.CheckboxGroup(choices=["pretrained","fine-tuned","chat-models","merges/moerges"], | 
					
						
						|  | label="Model Types", | 
					
						
						|  | elem_id="model_types", | 
					
						
						|  | value=["pretrained","fine-tuned","chat-models","merges/moerges"]) | 
					
						
						|  | inbox_text = gr.CheckboxGroup(label = """Inference Tested Column Legend: ๐จ = Gaudi, ๐ฆ = Xeon, ๐ฅ = GPU Max, ๐  = Core Ultra, ๐ข = Arc GPU     (Please see "โAbout" tab for more info)""") | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | color = '#2f82d4' | 
					
						
						|  | def make_clickable(row): | 
					
						
						|  | return f'<a href="https://huggingface.co/{row["Model"]}" target="_blank" style="color: {color}; text-decoration: underline;">{row["Model"]}</a>' | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | initial_df = pd.read_csv("./status/leaderboard_status_060524.csv") | 
					
						
						|  | initial_df["Model"] = initial_df.apply(make_clickable, axis=1) | 
					
						
						|  | initial_df = initial_df.sort_values(by='Average', ascending=False) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def update_df(hw_selected, platform_selected, affiliation_selected, size_selected, precision_selected, type_selected): | 
					
						
						|  | filtered_df = filter_benchmarks_table(df=initial_df, hw_selected=hw_selected, platform_selected=platform_selected, | 
					
						
						|  | affiliation_selected=affiliation_selected, size_selected=size_selected, | 
					
						
						|  | precision_selected=precision_selected, type_selected=type_selected) | 
					
						
						|  | return filtered_df | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | initial_filtered_df = update_df(["Gaudi","Xeon","GPU Max","Arc GPU","Core Ultra"], | 
					
						
						|  | ["Intel Developer Cloud","AWS","Azure","Google Cloud Platform","Local"], | 
					
						
						|  | ["No Affiliation","Intel Innovator","Student Ambassador","Intel Liftoff", "Intel Engineering", "Other"], | 
					
						
						|  | [1,2,3,5,7,13,35,60,70,100], | 
					
						
						|  | ["fp32","fp16","bf16","int8","fp8", "int4"], | 
					
						
						|  | ["pretrained","fine-tuned","chat-models","merges/moerges"]) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | gradio_df_display = gr.Dataframe(value=initial_filtered_df, headers=["Inference Tested","Model","Average","ARC","HellaSwag","MMLU", | 
					
						
						|  | "TruthfulQA","Winogrande","Training Hardware","Model Type","Precision", | 
					
						
						|  | "Size","Infrastructure","Affiliation"], | 
					
						
						|  | datatype=["html","html","str","str","str","str","str","str","str","str","str","str","str","str"]) | 
					
						
						|  |  | 
					
						
						|  | filter_hw.change(fn=update_df, | 
					
						
						|  | inputs=[filter_hw, filter_platform, filter_affiliation, filter_size, filter_precision, filter_type], | 
					
						
						|  | outputs=[gradio_df_display]) | 
					
						
						|  | filter_platform.change(fn=update_df, | 
					
						
						|  | inputs=[filter_hw, filter_platform, filter_affiliation, filter_size, filter_precision, filter_type], | 
					
						
						|  | outputs=[gradio_df_display]) | 
					
						
						|  | filter_affiliation.change(fn=update_df, | 
					
						
						|  | inputs=[filter_hw, filter_platform, filter_affiliation, filter_size, filter_precision, filter_type], | 
					
						
						|  | outputs=[gradio_df_display]) | 
					
						
						|  | filter_size.change(fn=update_df, | 
					
						
						|  | inputs=[filter_hw, filter_platform, filter_affiliation, filter_size, filter_precision, filter_type], | 
					
						
						|  | outputs=[gradio_df_display]) | 
					
						
						|  | filter_precision.change(fn=update_df, | 
					
						
						|  | inputs=[filter_hw, filter_platform, filter_affiliation, filter_size, filter_precision, filter_type], | 
					
						
						|  | outputs=[gradio_df_display]) | 
					
						
						|  | filter_type.change(fn=update_df, | 
					
						
						|  | inputs=[filter_hw, filter_platform, filter_affiliation, filter_size, filter_precision, filter_type], | 
					
						
						|  | outputs=[gradio_df_display]) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | with gr.TabItem("๐งฐ Train a Model", elem_id="getting-started", id=1): | 
					
						
						|  | gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text") | 
					
						
						|  | with gr.TabItem("๐ Deployment Tips", elem_id="deployment-tips", id=2): | 
					
						
						|  | gr.Markdown(DEPLOY_TEXT, elem_classes="markdown-text") | 
					
						
						|  | with gr.TabItem("๐ฉโ๐ป Developer Programs", elem_id="hardward-program", id=3): | 
					
						
						|  | gr.Markdown(PROGRAMS_TEXT, elem_classes="markdown-text") | 
					
						
						|  | with gr.TabItem("โ About ", elem_id="about", id=5): | 
					
						
						|  | gr.Markdown(ABOUT, elem_classes="markdown-text") | 
					
						
						|  | with gr.TabItem("๐๏ธ Submit", elem_id="submit", id=4): | 
					
						
						|  | gr.Markdown(SUBMIT_TEXT, elem_classes="markdown-text") | 
					
						
						|  | with gr.Row(): | 
					
						
						|  | gr.Markdown("# Submit Model for Evaluation ๐๏ธ", elem_classes="markdown-text") | 
					
						
						|  | with gr.Row(): | 
					
						
						|  | with gr.Column(): | 
					
						
						|  | model_name_textbox = gr.Textbox(label="Model name", | 
					
						
						|  | info = """ Name of Model in the Hub. For example: 'Intel/neural-chat-7b-v1-1'""",) | 
					
						
						|  | revision_name_textbox = gr.Textbox(label="Revision commit (Branch)", placeholder="main") | 
					
						
						|  | model_type = gr.Dropdown( | 
					
						
						|  | choices=["pretrained","fine-tuned","chat models","merges/moerges"], | 
					
						
						|  | label="Model type", | 
					
						
						|  | multiselect=False, | 
					
						
						|  | value="pretrained", | 
					
						
						|  | interactive=True, | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  | hw_type = gr.Dropdown( | 
					
						
						|  | choices=["Gaudi","Xeon","GPU Max","Arc GPU","Core Ultra"], | 
					
						
						|  | label="Training Hardware", | 
					
						
						|  | multiselect=False, | 
					
						
						|  | value="Gaudi", | 
					
						
						|  | interactive=True, | 
					
						
						|  | ) | 
					
						
						|  | terms = gr.Checkbox( | 
					
						
						|  | label="Check if you agree to having your model evaluated and published to the leaderboard by our team.", | 
					
						
						|  | value=False, | 
					
						
						|  | interactive=True, | 
					
						
						|  | ) | 
					
						
						|  | submit_button = gr.Button("๐ค Submit Eval ๐ป") | 
					
						
						|  | submission_result = gr.Markdown() | 
					
						
						|  |  | 
					
						
						|  | with gr.Column(): | 
					
						
						|  | precision = gr.Dropdown( | 
					
						
						|  | choices=["fp32","fp16","bf16","int8","fp8", "int4"], | 
					
						
						|  | label="Precision", | 
					
						
						|  | multiselect=False, | 
					
						
						|  | value="fp16", | 
					
						
						|  | interactive=True, | 
					
						
						|  | ) | 
					
						
						|  | weight_type = gr.Dropdown( | 
					
						
						|  | choices=["Original", "Adapter", "Delta"], | 
					
						
						|  | label="Weights type", | 
					
						
						|  | multiselect=False, | 
					
						
						|  | value="Original", | 
					
						
						|  | interactive=True, | 
					
						
						|  | info = """ Select the appropriate weights. If you have fine-tuned or adapted a model with PEFT or Delta-Tuning you likely have | 
					
						
						|  | LoRA Adapters or Delta Weights.""", | 
					
						
						|  | ) | 
					
						
						|  | training_infra = gr.Dropdown( | 
					
						
						|  | choices=["Intel Developer Cloud","AWS","Azure","Google Cloud Platform","Local"], | 
					
						
						|  | label="Training Infrastructure", | 
					
						
						|  | multiselect=False, | 
					
						
						|  | value="Intel Developer Cloud", | 
					
						
						|  | interactive=True, | 
					
						
						|  | info = """ Select the infrastructure that the model was developed on. | 
					
						
						|  | Local is the ideal choice for Core Ultra, ARC GPUs, and local data center infrastructure.""", | 
					
						
						|  | ) | 
					
						
						|  | affiliation = gr.Dropdown( | 
					
						
						|  | choices=["No Affiliation","Intel Innovator","Student Ambassador","Intel Liftoff", "Intel Engineering", "Other"], | 
					
						
						|  | label="Affiliation with Intel", | 
					
						
						|  | multiselect=False, | 
					
						
						|  | value="No Affiliation", | 
					
						
						|  | interactive=True, | 
					
						
						|  | info = """ Select "No Affiliation" if not part of any Intel programs.""", | 
					
						
						|  | ) | 
					
						
						|  | base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)") | 
					
						
						|  |  | 
					
						
						|  | submit_button.click( | 
					
						
						|  | fn=submit_to_endpoint, | 
					
						
						|  | inputs=[model_name_textbox, revision_name_textbox, model_type, hw_type, terms, precision, weight_type, training_infra, affiliation, base_model_name_textbox], | 
					
						
						|  | outputs=submission_result) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | with gr.Accordion("๐ Citation", open=False): | 
					
						
						|  | citation =gr.Textbox(value = CITATION_TEXT, | 
					
						
						|  | lines=6, | 
					
						
						|  | label="Use the following to cite this content") | 
					
						
						|  |  | 
					
						
						|  | gr.Markdown("""<div style="display: flex; justify-content: center;"> <p> Intel, the Intel logo and Gaudi are trademarks of Intel Corporation or its subsidiaries. | 
					
						
						|  | *Other names and brands may be claimed as the property of others. | 
					
						
						|  | </p> </div>""") | 
					
						
						|  | demo.queue() | 
					
						
						|  | demo.launch(share=False) |