Terry Zhuo
		
	commited on
		
		
					Commit 
							
							·
						
						c3c5af3
	
1
								Parent(s):
							
							7d73426
								
update
Browse files- app.py +2 -2
- src/display/about.py +3 -2
- src/envs.py +1 -1
    	
        app.py
    CHANGED
    
    | @@ -390,7 +390,7 @@ with main_block as demo: | |
| 390 | 
             
                            gr.Markdown(
         | 
| 391 | 
             
                                """
         | 
| 392 | 
             
                            **Notes:**
         | 
| 393 | 
            -
                            - For the  | 
| 394 | 
             
                            - _Hard Set_ vs _Full Set_:
         | 
| 395 | 
             
                                - <u>Hard Set</u>: A subset of ~150 BigCodeBench tasks which is more user-facing and challenging.
         | 
| 396 | 
             
                                - <u>Full Set</u>: The full set of 1140 BigCodeBench tasks.
         | 
| @@ -524,7 +524,7 @@ with main_block as demo: | |
| 524 | 
             
                        )
         | 
| 525 |  | 
| 526 | 
             
                    with gr.TabItem("🛠️ Code Execution (Beta)", id=5):
         | 
| 527 | 
            -
                        gr.Markdown("## Upload your sanitized JSONL file | 
| 528 |  | 
| 529 | 
             
                        with gr.Row():
         | 
| 530 | 
             
                            jsonl_file = gr.File(label="Upload JSONL file", file_types=[".jsonl"])
         | 
|  | |
| 390 | 
             
                            gr.Markdown(
         | 
| 391 | 
             
                                """
         | 
| 392 | 
             
                            **Notes:**
         | 
| 393 | 
            +
                            - For the limited compute, we now update the Hard Set leaderboard. (**We are open to sponsorship for more compute!**)
         | 
| 394 | 
             
                            - _Hard Set_ vs _Full Set_:
         | 
| 395 | 
             
                                - <u>Hard Set</u>: A subset of ~150 BigCodeBench tasks which is more user-facing and challenging.
         | 
| 396 | 
             
                                - <u>Full Set</u>: The full set of 1140 BigCodeBench tasks.
         | 
|  | |
| 524 | 
             
                        )
         | 
| 525 |  | 
| 526 | 
             
                    with gr.TabItem("🛠️ Code Execution (Beta)", id=5):
         | 
| 527 | 
            +
                        gr.Markdown("## Upload your [sanitized JSONL file](https://github.com/bigcode-project/bigcodebench?tab=readme-ov-file#code-post-processing) to evaluate\n\n### Hard Set Ground Truth Pass Rate: 100%\n### Full Set Ground Truth Pass Rate: 99.6%")
         | 
| 528 |  | 
| 529 | 
             
                        with gr.Row():
         | 
| 530 | 
             
                            jsonl_file = gr.File(label="Upload JSONL file", file_types=[".jsonl"])
         | 
    	
        src/display/about.py
    CHANGED
    
    | @@ -143,6 +143,7 @@ CITATION_BUTTON_TEXT = r""" | |
| 143 | 
             
            """
         | 
| 144 |  | 
| 145 | 
             
            SUBMISSION_TEXT_3="""
         | 
| 146 | 
            -
            ## We welcome the community to request for new models to be added to the leaderboard.
         | 
| 147 | 
            -
            ##  | 
|  | |
| 148 | 
             
            """
         | 
|  | |
| 143 | 
             
            """
         | 
| 144 |  | 
| 145 | 
             
            SUBMISSION_TEXT_3="""
         | 
| 146 | 
            +
            ## We welcome the community to submit the evaluation results or request for new models to be added to the leaderboard.
         | 
| 147 | 
            +
            ## To submit the evaluation results, please send us your (1) raw generations, (2) sanitized generations, (3) execution logs, and (4) pass rate results to our [email](mailto:terry.[email protected]). We will review and add the results to the leaderboard as soon as possible.
         | 
| 148 | 
            +
            ## To request for the new model evaluation, please [file an issue](https://github.com/bigcode-project/bigcodebench/issues/new/choose) to add the model to the leaderboard or [start a discussion](https://huggingface.co/spaces/bigcode/bigcodebench-leaderboard/discussions/new) in the community 🤗
         | 
| 149 | 
             
            """
         | 
    	
        src/envs.py
    CHANGED
    
    | @@ -4,7 +4,7 @@ from huggingface_hub import HfApi | |
| 4 | 
             
            # clone / pull the lmeh eval data
         | 
| 5 | 
             
            HF_TOKEN = os.environ.get("HF_TOKEN", None)
         | 
| 6 |  | 
| 7 | 
            -
            DATA_VERSION = "v0.1. | 
| 8 |  | 
| 9 | 
             
            REPO_ID = "bigcode/bigcodebench-leaderboard"
         | 
| 10 | 
             
            QUEUE_REPO = "bigcode/bigcodebench-requests"
         | 
|  | |
| 4 | 
             
            # clone / pull the lmeh eval data
         | 
| 5 | 
             
            HF_TOKEN = os.environ.get("HF_TOKEN", None)
         | 
| 6 |  | 
| 7 | 
            +
            DATA_VERSION = "v0.1.1_hf"
         | 
| 8 |  | 
| 9 | 
             
            REPO_ID = "bigcode/bigcodebench-leaderboard"
         | 
| 10 | 
             
            QUEUE_REPO = "bigcode/bigcodebench-requests"
         | 
